[RESEND PATCH v8 11/11] iommu: Remove iommu group changes notifier
The iommu group changes notifer is not referenced in the tree. Remove it to avoid dead code. Suggested-by: Christoph Hellwig Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe --- include/linux/iommu.h | 23 - drivers/iommu/iommu.c | 75 --- 2 files changed, 98 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 77972ef978b5..6ef2df258673 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -407,13 +407,6 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } -#define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ -#define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ -#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ -#define IOMMU_GROUP_NOTIFY_BOUND_DRIVER4 /* Post Driver bind */ -#define IOMMU_GROUP_NOTIFY_UNBIND_DRIVER 5 /* Pre Driver unbind */ -#define IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER 6 /* Post Driver unbind */ - extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); @@ -478,10 +471,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_group_register_notifier(struct iommu_group *group, -struct notifier_block *nb); -extern int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb); extern int iommu_register_device_fault_handler(struct device *dev, iommu_dev_fault_handler_t handler, void *data); @@ -878,18 +867,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return -ENODEV; -} - -static inline int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return 0; -} - static inline int iommu_register_device_fault_handler(struct device *dev, iommu_dev_fault_handler_t handler, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index eba8e8ccf19d..0c42ece25854 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +39,6 @@ struct iommu_group { struct kobject *devices_kobj; struct list_head devices; struct mutex mutex; - struct blocking_notifier_head notifier; void *iommu_data; void (*iommu_data_release)(void *iommu_data); char *name; @@ -632,7 +630,6 @@ struct iommu_group *iommu_group_alloc(void) mutex_init(>mutex); INIT_LIST_HEAD(>devices); INIT_LIST_HEAD(>entry); - BLOCKING_INIT_NOTIFIER_HEAD(>notifier); ret = ida_simple_get(_group_ida, 0, 0, GFP_KERNEL); if (ret < 0) { @@ -905,10 +902,6 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev) if (ret) goto err_put_group; - /* Notify any listeners about change to group. */ - blocking_notifier_call_chain(>notifier, -IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev); - trace_add_device_to_group(group->id, dev); dev_info(dev, "Adding to iommu group %d\n", group->id); @@ -950,10 +943,6 @@ void iommu_group_remove_device(struct device *dev) dev_info(dev, "Removing from iommu group %d\n", group->id); - /* Pre-notify listeners that a device is being removed. */ - blocking_notifier_call_chain(>notifier, -IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev); - mutex_lock(>mutex); list_for_each_entry(tmp_device, >devices, list) { if (tmp_device->dev == dev) { @@ -1075,36 +1064,6 @@ void iommu_group_put(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_put); -/** - * iommu_group_register_notifier - Register a notifier for group changes - * @group: the group to watch - * @nb: notifier block to signal - * - * This function allows iommu group users to track changes in a group. - * See include/linux/iommu.h for actions sent via this notifier. Caller - * should hold a reference to the group throughout notifier registration. - */ -int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ -
[RESEND PATCH v8 10/11] vfio: Remove iommu group notifier
The iommu core and driver core have been enhanced to avoid unsafe driver binding to a live group after iommu_group_set_dma_owner(PRIVATE_USER) has been called. There's no need to register iommu group notifier. This removes the iommu group notifer which contains BUG_ON() and WARN(). Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Alex Williamson --- drivers/vfio/vfio.c | 147 1 file changed, 147 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index b2f19d17d0c3..0c766384cee0 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -71,7 +71,6 @@ struct vfio_group { struct vfio_container *container; struct list_headdevice_list; struct mutexdevice_lock; - struct notifier_block nb; struct list_headvfio_next; struct list_headcontainer_next; atomic_topened; @@ -274,8 +273,6 @@ void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) } EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); -static int vfio_iommu_group_notifier(struct notifier_block *nb, -unsigned long action, void *data); static void vfio_group_get(struct vfio_group *group); /* @@ -395,13 +392,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, goto err_put; } - group->nb.notifier_call = vfio_iommu_group_notifier; - err = iommu_group_register_notifier(iommu_group, >nb); - if (err) { - ret = ERR_PTR(err); - goto err_put; - } - mutex_lock(_lock); /* Did we race creating this group? */ @@ -422,7 +412,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, err_unlock: mutex_unlock(_lock); - iommu_group_unregister_notifier(group->iommu_group, >nb); err_put: put_device(>dev); return ret; @@ -447,7 +436,6 @@ static void vfio_group_put(struct vfio_group *group) cdev_device_del(>cdev, >dev); mutex_unlock(_lock); - iommu_group_unregister_notifier(group->iommu_group, >nb); put_device(>dev); } @@ -503,141 +491,6 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, return NULL; } -/* - * Some drivers, like pci-stub, are only used to prevent other drivers from - * claiming a device and are therefore perfectly legitimate for a user owned - * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping - * of the device, but it does prevent the user from having direct access to - * the device, which is useful in some circumstances. - * - * We also assume that we can include PCI interconnect devices, ie. bridges. - * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge - * then all of the downstream devices will be part of the same IOMMU group as - * the bridge. Thus, if placing the bridge into the user owned IOVA space - * breaks anything, it only does so for user owned devices downstream. Note - * that error notification via MSI can be affected for platforms that handle - * MSI within the same IOVA space as DMA. - */ -static const char * const vfio_driver_allowed[] = { "pci-stub" }; - -static bool vfio_dev_driver_allowed(struct device *dev, - struct device_driver *drv) -{ - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) - return true; - } - - return match_string(vfio_driver_allowed, - ARRAY_SIZE(vfio_driver_allowed), - drv->name) >= 0; -} - -/* - * A vfio group is viable for use by userspace if all devices are in - * one of the following states: - * - driver-less - * - bound to a vfio driver - * - bound to an otherwise allowed driver - * - a PCI interconnect device - * - * We use two methods to determine whether a device is bound to a vfio - * driver. The first is to test whether the device exists in the vfio - * group. The second is to test if the device exists on the group - * unbound_list, indicating it's in the middle of transitioning from - * a vfio driver to driver-less. - */ -static int vfio_dev_viable(struct device *dev, void *data) -{ - struct vfio_group *group = data; - struct vfio_device *device; - struct device_driver *drv = READ_ONCE(dev->driver); - - if (!drv || vfio_dev_driver_allowed(dev, drv)) - return 0; - - device = vfio_group_get_device(group, dev); - if (device) { - vfio_device_put(device); - return 0; - } - - return -EINVAL; -} - -/* - * Async device support - */ -static int vfio_group_nb_add_dev(struct vfio_group *group, struct
[RESEND PATCH v8 09/11] vfio: Delete the unbound_list
From: Jason Gunthorpe commit 60720a0fc646 ("vfio: Add device tracking during unbind") added the unbound list to plug a problem with KVM where KVM_DEV_VFIO_GROUP_DEL relied on vfio_group_get_external_user() succeeding to return the vfio_group from a group file descriptor. The unbound list allowed vfio_group_get_external_user() to continue to succeed in edge cases. However commit 5d6dee80a1e9 ("vfio: New external user group/file match") deleted the call to vfio_group_get_external_user() during KVM_DEV_VFIO_GROUP_DEL. Instead vfio_external_group_match_file() is used to directly match the file descriptor to the group pointer. This in turn avoids the call down to vfio_dev_viable() during KVM_DEV_VFIO_GROUP_DEL and also avoids the trouble the first commit was trying to fix. There are no other users of vfio_dev_viable() that care about the time after vfio_unregister_group_dev() returns, so simply delete the unbound_list entirely. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Acked-by: Alex Williamson --- drivers/vfio/vfio.c | 74 ++--- 1 file changed, 2 insertions(+), 72 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 8a9347f732a5..b2f19d17d0c3 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -62,11 +62,6 @@ struct vfio_container { boolnoiommu; }; -struct vfio_unbound_dev { - struct device *dev; - struct list_headunbound_next; -}; - struct vfio_group { struct device dev; struct cdev cdev; @@ -79,8 +74,6 @@ struct vfio_group { struct notifier_block nb; struct list_headvfio_next; struct list_headcontainer_next; - struct list_headunbound_list; - struct mutexunbound_lock; atomic_topened; wait_queue_head_t container_q; enum vfio_group_typetype; @@ -340,16 +333,8 @@ vfio_group_get_from_iommu(struct iommu_group *iommu_group) static void vfio_group_release(struct device *dev) { struct vfio_group *group = container_of(dev, struct vfio_group, dev); - struct vfio_unbound_dev *unbound, *tmp; - - list_for_each_entry_safe(unbound, tmp, ->unbound_list, unbound_next) { - list_del(>unbound_next); - kfree(unbound); - } mutex_destroy(>device_lock); - mutex_destroy(>unbound_lock); iommu_group_put(group->iommu_group); ida_free(_ida, MINOR(group->dev.devt)); kfree(group); @@ -381,8 +366,6 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, refcount_set(>users, 1); INIT_LIST_HEAD(>device_list); mutex_init(>device_lock); - INIT_LIST_HEAD(>unbound_list); - mutex_init(>unbound_lock); init_waitqueue_head(>container_q); group->iommu_group = iommu_group; /* put in vfio_group_release() */ @@ -571,19 +554,8 @@ static int vfio_dev_viable(struct device *dev, void *data) struct vfio_group *group = data; struct vfio_device *device; struct device_driver *drv = READ_ONCE(dev->driver); - struct vfio_unbound_dev *unbound; - int ret = -EINVAL; - mutex_lock(>unbound_lock); - list_for_each_entry(unbound, >unbound_list, unbound_next) { - if (dev == unbound->dev) { - ret = 0; - break; - } - } - mutex_unlock(>unbound_lock); - - if (!ret || !drv || vfio_dev_driver_allowed(dev, drv)) + if (!drv || vfio_dev_driver_allowed(dev, drv)) return 0; device = vfio_group_get_device(group, dev); @@ -592,7 +564,7 @@ static int vfio_dev_viable(struct device *dev, void *data) return 0; } - return ret; + return -EINVAL; } /* @@ -634,7 +606,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, { struct vfio_group *group = container_of(nb, struct vfio_group, nb); struct device *dev = data; - struct vfio_unbound_dev *unbound; switch (action) { case IOMMU_GROUP_NOTIFY_ADD_DEVICE: @@ -663,28 +634,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, __func__, iommu_group_id(group->iommu_group), dev->driver->name); break; - case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: - dev_dbg(dev, "%s: group %d unbound from driver\n", __func__, - iommu_group_id(group->iommu_group)); - /* -* XXX An unbound device in a live group is ok, but we'd -* really like to avoid the above BUG_ON by preventing other -
[RESEND PATCH v8 08/11] vfio: Remove use of vfio_group_viable()
As DMA ownership is claimed for the iommu group when a VFIO group is added to a VFIO container, the VFIO group viability is guaranteed as long as group->container_users > 0. Remove those unnecessary group viability checks which are only hit when group->container_users is not zero. The only remaining reference is in GROUP_GET_STATUS, which could be called at any time when group fd is valid. Here we just replace the vfio_group_viable() by directly calling IOMMU core to get viability status. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Alex Williamson --- drivers/vfio/vfio.c | 18 ++ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56e741cbccce..8a9347f732a5 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1313,12 +1313,6 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) return ret; } -static bool vfio_group_viable(struct vfio_group *group) -{ - return (iommu_group_for_each_dev(group->iommu_group, -group, vfio_dev_viable) == 0); -} - static int vfio_group_add_container_user(struct vfio_group *group) { if (!atomic_inc_not_zero(>container_users)) @@ -1328,7 +1322,7 @@ static int vfio_group_add_container_user(struct vfio_group *group) atomic_dec(>container_users); return -EPERM; } - if (!group->container->iommu_driver || !vfio_group_viable(group)) { + if (!group->container->iommu_driver) { atomic_dec(>container_users); return -EINVAL; } @@ -1346,7 +1340,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) int ret = 0; if (0 == atomic_read(>container_users) || - !group->container->iommu_driver || !vfio_group_viable(group)) + !group->container->iommu_driver) return -EINVAL; if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) @@ -1438,11 +1432,11 @@ static long vfio_group_fops_unl_ioctl(struct file *filep, status.flags = 0; - if (vfio_group_viable(group)) - status.flags |= VFIO_GROUP_FLAGS_VIABLE; - if (group->container) - status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; + status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | + VFIO_GROUP_FLAGS_VIABLE; + else if (!iommu_group_dma_owner_claimed(group->iommu_group)) + status.flags |= VFIO_GROUP_FLAGS_VIABLE; if (copy_to_user((void __user *)arg, , minsz)) return -EFAULT; -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v8 07/11] vfio: Set DMA ownership for VFIO devices
Claim group dma ownership when an IOMMU group is set to a container, and release the dma ownership once the iommu group is unset from the container. This change disallows some unsafe bridge drivers to bind to non-ACS bridges while devices under them are assigned to user space. This is an intentional enhancement and possibly breaks some existing configurations. The recommendation to such an affected user would be that the previously allowed host bridge driver was unsafe for this use case and to continue to enable assignment of devices within that group, the driver should be unbound from the bridge device or replaced with the pci-stub driver. For any bridge driver, we consider it unsafe if it satisfies any of the following conditions: 1) The bridge driver uses DMA. Calling pci_set_master() or calling any kernel DMA API (dma_map_*() and etc.) is an indicate that the driver is doing DMA. 2) If the bridge driver uses MMIO, it should be tolerant to hostile userspace also touching the same MMIO registers via P2P DMA attacks. If the bridge driver turns out to be a safe one, it could be used as before by setting the driver's .driver_managed_dma field, just like what we have done in the pcieport driver. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Alex Williamson --- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 1 + drivers/vfio/pci/vfio_pci.c | 1 + drivers/vfio/platform/vfio_amba.c | 1 + drivers/vfio/platform/vfio_platform.c | 1 + drivers/vfio/vfio.c | 10 +- 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 6e2e62c6f47a..3feff729f3ce 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -588,6 +588,7 @@ static struct fsl_mc_driver vfio_fsl_mc_driver = { .name = "vfio-fsl-mc", .owner = THIS_MODULE, }, + .driver_managed_dma = true, }; static int __init vfio_fsl_mc_driver_init(void) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 2b047469e02f..58839206d1ca 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -194,6 +194,7 @@ static struct pci_driver vfio_pci_driver = { .remove = vfio_pci_remove, .sriov_configure= vfio_pci_sriov_configure, .err_handler= _pci_core_err_handlers, + .driver_managed_dma = true, }; static void __init vfio_pci_fill_ids(void) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index badfffea14fb..1aaa4f721bd2 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -95,6 +95,7 @@ static struct amba_driver vfio_amba_driver = { .name = "vfio-amba", .owner = THIS_MODULE, }, + .driver_managed_dma = true, }; module_amba_driver(vfio_amba_driver); diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 68a1c87066d7..04f40c5acfd6 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -76,6 +76,7 @@ static struct platform_driver vfio_platform_driver = { .driver = { .name = "vfio-platform", }, + .driver_managed_dma = true, }; module_platform_driver(vfio_platform_driver); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index a4555014bd1e..56e741cbccce 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1198,6 +1198,8 @@ static void __vfio_group_unset_container(struct vfio_group *group) driver->ops->detach_group(container->iommu_data, group->iommu_group); + iommu_group_release_dma_owner(group->iommu_group); + group->container = NULL; wake_up(>container_q); list_del(>container_next); @@ -1282,13 +1284,19 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) goto unlock_out; } + ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); + if (ret) + goto unlock_out; + driver = container->iommu_driver; if (driver) { ret = driver->ops->attach_group(container->iommu_data, group->iommu_group, group->type); - if (ret) + if (ret) { + iommu_group_release_dma_owner(group->iommu_group); goto unlock_out; + } } group->container = container; -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v8 03/11] amba: Stop sharing platform_dma_configure()
Stop sharing platform_dma_configure() helper as they are about to have their own bus dma_configure callbacks. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe --- include/linux/platform_device.h | 2 -- drivers/amba/bus.c | 19 ++- drivers/base/platform.c | 3 +-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 7c96f169d274..17fde717df68 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -328,8 +328,6 @@ extern int platform_pm_restore(struct device *dev); #define platform_pm_restoreNULL #endif -extern int platform_dma_configure(struct device *dev); - #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ .suspend = platform_pm_suspend, \ diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index d3bd14aaabf6..76b52bd2c2a4 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #define to_amba_driver(d) container_of(d, struct amba_driver, drv) @@ -273,6 +275,21 @@ static void amba_shutdown(struct device *dev) drv->shutdown(to_amba_device(dev)); } +static int amba_dma_configure(struct device *dev) +{ + enum dev_dma_attr attr; + int ret = 0; + + if (dev->of_node) { + ret = of_dma_configure(dev, dev->of_node, true); + } else if (has_acpi_companion(dev)) { + attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode)); + ret = acpi_dma_configure(dev, attr); + } + + return ret; +} + #ifdef CONFIG_PM /* * Hooks to provide runtime PM of the pclk (bus clock). It is safe to @@ -341,7 +358,7 @@ struct bus_type amba_bustype = { .probe = amba_probe, .remove = amba_remove, .shutdown = amba_shutdown, - .dma_configure = platform_dma_configure, + .dma_configure = amba_dma_configure, .pm = _pm, }; EXPORT_SYMBOL_GPL(amba_bustype); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 8cc272fd5c99..d7915734d931 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1454,8 +1454,7 @@ static void platform_shutdown(struct device *_dev) drv->shutdown(dev); } - -int platform_dma_configure(struct device *dev) +static int platform_dma_configure(struct device *dev) { enum dev_dma_attr attr; int ret = 0; -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v8 04/11] bus: platform, amba, fsl-mc, PCI: Add device DMA ownership management
The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers with the device DMA managed by kernel drivers or user-space applications. Unfortunately, multiple devices may be placed in the same IOMMU group because they cannot be isolated from each other. The DMA on these devices must either be entirely under kernel control or userspace control, never a mixture. Otherwise the driver integrity is not guaranteed because they could access each other through the peer-to-peer accesses which by-pass the IOMMU protection. This checks and sets the default DMA mode during driver binding, and cleanups during driver unbinding. In the default mode, the device DMA is managed by the device driver which handles DMA operations through the kernel DMA APIs (see Documentation/core-api/dma-api.rst). For cases where the devices are assigned for userspace control through the userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/ vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this default setting in the assumption that the drivers know what they are doing with the device DMA. Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure is currently a problem. As things stand, the IOMMU driver ignored the initial iommu_probe_device() call when the device was added, since at that point it had no fwspec yet. In this situation, {of,acpi}_iommu_configure() are retriggering iommu_probe_device() after the IOMMU driver has seen the firmware data via .of_xlate to learn that it actually responsible for the given device. As the result, before that gets fixed, iommu_use_default_domain() goes at the end, and calls arch_teardown_dma_ops() if it fails. Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Stuart Yoder Cc: Laurentiu Tudor Signed-off-by: Lu Baolu Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Reviewed-by: Robin Murphy Tested-by: Eric Auger --- include/linux/amba/bus.h| 8 include/linux/fsl/mc.h | 8 include/linux/pci.h | 8 include/linux/platform_device.h | 8 drivers/amba/bus.c | 18 ++ drivers/base/platform.c | 18 ++ drivers/bus/fsl-mc/fsl-mc-bus.c | 24 ++-- drivers/pci/pci-driver.c| 18 ++ 8 files changed, 108 insertions(+), 2 deletions(-) diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6562f543c3e0..2ddce9bcd00e 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -79,6 +79,14 @@ struct amba_driver { void(*remove)(struct amba_device *); void(*shutdown)(struct amba_device *); const struct amba_id*id_table; + /* +* For most device drivers, no need to care about this flag as long as +* all DMAs are handled through the kernel DMA API. For some special +* ones, for example VFIO drivers, they know how to manage the DMA +* themselves and set this flag so that the IOMMU layer will allow them +* to setup and manage their own I/O address space. +*/ + bool driver_managed_dma; }; /* diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 7b6c42bfb660..27efef8affb1 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -32,6 +32,13 @@ struct fsl_mc_io; * @shutdown: Function called at shutdown time to quiesce the device * @suspend: Function called when a device is stopped * @resume: Function called when a device is resumed + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to care about this flag + * as long as all DMAs are handled through the kernel DMA API. + * For some special ones, for example VFIO drivers, they know + * how to manage the DMA themselves and set this flag so that + * the IOMMU layer will allow them to setup and manage their + * own I/O address space. * * Generic DPAA device driver object for device drivers that are registered * with a DPRC bus. This structure is to be embedded in each device-specific @@ -45,6 +52,7 @@ struct fsl_mc_driver { void (*shutdown)(struct fsl_mc_device *dev); int (*suspend)(struct fsl_mc_device *dev, pm_message_t state); int (*resume)(struct fsl_mc_device *dev); + bool driver_managed_dma; }; #define to_fsl_mc_driver(_drv) \ diff --git a/include/linux/pci.h b/include/linux/pci.h index 60adf42460ab..b933d2b08d4d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -895,6 +895,13 @@ struct module; * created once it is bound to the driver. * @driver:Driver model structure. * @dynids:List of dynamically added device IDs. + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to
[RESEND PATCH v8 06/11] PCI: portdrv: Set driver_managed_dma
If a switch lacks ACS P2P Request Redirect, a device below the switch can bypass the IOMMU and DMA directly to other devices below the switch, so all the downstream devices must be in the same IOMMU group as the switch itself. The existing VFIO framework allows the portdrv driver to be bound to the bridge while its downstream devices are assigned to user space. The pci_dma_configure() marks the IOMMU group as containing only devices with kernel drivers that manage DMA. Avoid this default behavior for the portdrv driver in order for compatibility with the current VFIO usage. We achieve this by setting ".driver_managed_dma = true" in pci_driver structure. It is safe because the portdrv driver meets below criteria: - This driver doesn't use DMA, as you can't find any related calls like pci_set_master() or any kernel DMA API (dma_map_*() and etc.). - It doesn't use MMIO as you can't find ioremap() or similar calls. It's tolerant to userspace possibly also touching the same MMIO registers via P2P DMA access. Suggested-by: Jason Gunthorpe Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas --- drivers/pci/pcie/portdrv_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 4b8801656ffb..7f8788a970ae 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -202,6 +202,8 @@ static struct pci_driver pcie_portdriver = { .err_handler= _portdrv_err_handler, + .driver_managed_dma = true, + .driver.pm = PCIE_PORTDRV_PM_OPS, }; -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v8 05/11] PCI: pci_stub: Set driver_managed_dma
The current VFIO implementation allows pci-stub driver to be bound to a PCI device with other devices in the same IOMMU group being assigned to userspace. The pci-stub driver has no dependencies on DMA or the IOVA mapping of the device, but it does prevent the user from having direct access to the device, which is useful in some circumstances. The pci_dma_configure() marks the iommu_group as containing only devices with kernel drivers that manage DMA. For compatibility with the VFIO usage, avoid this default behavior for the pci_stub. This allows the pci_stub still able to be used by the admin to block driver binding after applying the DMA ownership to VFIO. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas --- drivers/pci/pci-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c index e408099fea52..d1f4c1ce7bd1 100644 --- a/drivers/pci/pci-stub.c +++ b/drivers/pci/pci-stub.c @@ -36,6 +36,7 @@ static struct pci_driver stub_driver = { .name = "pci-stub", .id_table = NULL, /* only dynamic id's */ .probe = pci_stub_probe, + .driver_managed_dma = true, }; static int __init pci_stub_init(void) -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v8 02/11] driver core: Add dma_cleanup callback in bus_type
The bus_type structure defines dma_configure() callback for bus drivers to configure DMA on the devices. This adds the paired dma_cleanup() callback and calls it during driver unbinding so that bus drivers can do some cleanup work. One use case for this paired DMA callbacks is for the bus driver to check for DMA ownership conflicts during driver binding, where multiple devices belonging to a same IOMMU group (the minimum granularity of isolation and protection) may be assigned to kernel drivers or user space respectively. Without this change, for example, the vfio driver has to listen to a bus BOUND_DRIVER event and then BUG_ON() in case of dma ownership conflict. This leads to bad user experience since careless driver binding operation may crash the system if the admin overlooks the group restriction. Aside from bad design, this leads to a security problem as a root user, even with lockdown=integrity, can force the kernel to BUG. With this change, the bus driver could check and set the DMA ownership in driver binding process and fail on ownership conflicts. The DMA ownership should be released during driver unbinding. Signed-off-by: Lu Baolu Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe --- include/linux/device/bus.h | 3 +++ drivers/base/dd.c | 5 + 2 files changed, 8 insertions(+) diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index a039ab809753..d8b29ccd07e5 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -59,6 +59,8 @@ struct fwnode_handle; * bus supports. * @dma_configure: Called to setup DMA configuration on a device on * this bus. + * @dma_cleanup: Called to cleanup DMA configuration on a device on + * this bus. * @pm:Power management operations of this bus, callback the specific * device driver's pm-ops. * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU @@ -103,6 +105,7 @@ struct bus_type { int (*num_vf)(struct device *dev); int (*dma_configure)(struct device *dev); + void (*dma_cleanup)(struct device *dev); const struct dev_pm_ops *pm; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 3fc3b5940bb3..94b7ac9bf459 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -671,6 +671,8 @@ static int really_probe(struct device *dev, struct device_driver *drv) if (dev->bus) blocking_notifier_call_chain(>bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); pinctrl_bind_failed: device_links_no_driver(dev); device_unbind_cleanup(dev); @@ -1199,6 +1201,9 @@ static void __device_release_driver(struct device *dev, struct device *parent) device_remove(dev); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); + device_links_driver_cleanup(dev); device_unbind_cleanup(dev); -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v8 00/11] Fix BUG_ON in vfio_iommu_group_notifier()
Hi Joerg, This is a resend version of v8 posted here: https://lore.kernel.org/linux-iommu/20220308054421.847385-1-baolu...@linux.intel.com/ as we discussed in this thread: https://lore.kernel.org/linux-iommu/yk%2fq1bgn8pc5h...@8bytes.org/ All patches can be applied perfectly except this one: - [PATCH v8 02/11] driver core: Add dma_cleanup callback in bus_type It conflicts with below refactoring commit: - 4b775aaf1ea99 "driver core: Refactor sysfs and drv/bus remove hooks" The conflict has been fixed in this post. No functional changes in this series. I suppress cc-ing this series to all v8 reviewers in order to avoid spam. Please consider it for your iommu tree. Best regards, baolu Change log: - v8 and before: - Please refer to v8 post for all the change history. - v8-resend - Rebase the series on top of v5.18-rc3. - Add Reviewed-by's granted by Robin. - Add a Tested-by granted by Eric. Jason Gunthorpe (1): vfio: Delete the unbound_list Lu Baolu (10): iommu: Add DMA ownership management interfaces driver core: Add dma_cleanup callback in bus_type amba: Stop sharing platform_dma_configure() bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management PCI: pci_stub: Set driver_managed_dma PCI: portdrv: Set driver_managed_dma vfio: Set DMA ownership for VFIO devices vfio: Remove use of vfio_group_viable() vfio: Remove iommu group notifier iommu: Remove iommu group changes notifier include/linux/amba/bus.h | 8 + include/linux/device/bus.h| 3 + include/linux/fsl/mc.h| 8 + include/linux/iommu.h | 54 +++--- include/linux/pci.h | 8 + include/linux/platform_device.h | 10 +- drivers/amba/bus.c| 37 +++- drivers/base/dd.c | 5 + drivers/base/platform.c | 21 ++- drivers/bus/fsl-mc/fsl-mc-bus.c | 24 ++- drivers/iommu/iommu.c | 228 drivers/pci/pci-driver.c | 18 ++ drivers/pci/pci-stub.c| 1 + drivers/pci/pcie/portdrv_pci.c| 2 + drivers/vfio/fsl-mc/vfio_fsl_mc.c | 1 + drivers/vfio/pci/vfio_pci.c | 1 + drivers/vfio/platform/vfio_amba.c | 1 + drivers/vfio/platform/vfio_platform.c | 1 + drivers/vfio/vfio.c | 245 ++ 19 files changed, 338 insertions(+), 338 deletions(-) -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v8 01/11] iommu: Add DMA ownership management interfaces
Multiple devices may be placed in the same IOMMU group because they cannot be isolated from each other. These devices must either be entirely under kernel control or userspace control, never a mixture. This adds dma ownership management in iommu core and exposes several interfaces for the device drivers and the device userspace assignment framework (i.e. VFIO), so that any conflict between user and kernel controlled dma could be detected at the beginning. The device driver oriented interfaces are, int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); By calling iommu_device_use_default_domain(), the device driver tells the iommu layer that the device dma is handled through the kernel DMA APIs. The iommu layer will manage the IOVA and use the default domain for DMA address translation. The device user-space assignment framework oriented interfaces are, int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); The device userspace assignment must be disallowed if the DMA owner claiming interface returns failure. Signed-off-by: Jason Gunthorpe Signed-off-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Robin Murphy --- include/linux/iommu.h | 31 + drivers/iommu/iommu.c | 153 +- 2 files changed, 181 insertions(+), 3 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9208eca4b0d1..77972ef978b5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -675,6 +675,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +int iommu_device_use_default_domain(struct device *dev); +void iommu_device_unuse_default_domain(struct device *dev); + +int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); +void iommu_group_release_dma_owner(struct iommu_group *group); +bool iommu_group_dma_owner_claimed(struct iommu_group *group); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -1031,6 +1038,30 @@ static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; } + +static inline int iommu_device_use_default_domain(struct device *dev) +{ + return 0; +} + +static inline void iommu_device_unuse_default_domain(struct device *dev) +{ +} + +static inline int +iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) +{ + return -ENODEV; +} + +static inline void iommu_group_release_dma_owner(struct iommu_group *group) +{ +} + +static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) +{ + return false; +} #endif /* CONFIG_IOMMU_API */ /** diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f2c45b85b9fc..eba8e8ccf19d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -48,6 +48,8 @@ struct iommu_group { struct iommu_domain *default_domain; struct iommu_domain *domain; struct list_head entry; + unsigned int owner_cnt; + void *owner; }; struct group_device { @@ -294,7 +296,11 @@ int iommu_probe_device(struct device *dev) mutex_lock(>mutex); iommu_alloc_default_domain(group, dev); - if (group->default_domain) { + /* +* If device joined an existing group which has been claimed, don't +* attach the default domain. +*/ + if (group->default_domain && !group->owner) { ret = __iommu_attach_device(group->default_domain, dev); if (ret) { mutex_unlock(>mutex); @@ -2109,7 +2115,7 @@ static int __iommu_attach_group(struct iommu_domain *domain, { int ret; - if (group->default_domain && group->domain != group->default_domain) + if (group->domain && group->domain != group->default_domain) return -EBUSY; ret = __iommu_group_for_each_dev(group, domain, @@ -2146,7 +2152,11 @@ static void __iommu_detach_group(struct iommu_domain *domain, { int ret; - if (!group->default_domain) { + /* +* If the group has been claimed already, do not re-attach the default +* domain. +*/ + if (!group->default_domain || group->owner) { __iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device); group->domain = NULL; @@ -3095,3 +3105,140 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, return ret; } + +/** + * iommu_device_use_default_domain() - Device driver wants to handle device + * DMA through the kernel DMA API. + * @dev:
Re: [PATCH 03/25] dma-direct: take dma-ranges/offsets into account in resource mapping
Hello Robin. Sorry for the delayed answer. My comments are below. On Thu, Mar 24, 2022 at 11:30:38AM +, Robin Murphy wrote: > On 2022-03-24 01:48, Serge Semin wrote: > > A basic device-specific linear memory mapping was introduced back in > > commit ("dma: Take into account dma_pfn_offset") as a single-valued offset > > preserved in the device.dma_pfn_offset field, which was initialized for > > instance by means of the "dma-ranges" DT property. Afterwards the > > functionality was extended to support more than one device-specific region > > defined in the device.dma_range_map list of maps. But all of these > > improvements concerned a single pointer, page or sg DMA-mapping methods, > > while the system resource mapping function turned to miss the > > corresponding modification. Thus the dma_direct_map_resource() method now > > just casts the CPU physical address to the device DMA address with no > > dma-ranges-based mapping taking into account, which is obviously wrong. > > Let's fix it by using the phys_to_dma_direct() method to get the > > device-specific bus address from the passed memory resource for the case > > of the directly mapped DMA. > > It may not have been well-documented at the time, but this was largely > intentional. The assumption based on known systems was that where > dma_pfn_offset existed, it would *not* apply to peer MMIO addresses. Well, I'd say it wasn't documented or even discussed at all. At least after a pretty much comprehensive retrospective research I failed to find any note about the reason of having all the dma_direct_map*() methods converted to supporting the dma_pfn_offset/dma_range_map ranges, but leaving the dma_direct_map_resource() method out of that conversion. Neither it is immediately inferable from the method usage and its prototype that it is supposed to be utilized for the DMA memory addresses, not the CPU one. > > For instance, DTs for TI Keystone 2 platforms only describe an offset for > RAM: > > dma-ranges = <0x8000 0x8 0x 0x8000>; > > but a DMA controller might also want to access something in the MMIO range > 0x0-0x7fff, of which it still has an identical non-offset view. If a > driver was previously using dma_map_resource() for that, it would now start > getting DMA_MAPPING_ERROR because the dma_range_map exists but doesn't > describe the MMIO region. I agree that in hindsight it's not an ideal > situation, but it's how things have ended up, so at this point I'm wary of > making potentially-breaking changes. Hmm, what if the driver was previously using for instance the dma_direct_map_sg() method for it? Following this logic you would have needed to decline the whole dma_pfn_offset/dma_range_map ranges support, since the dma_direct_map_sg(), dma_direct_map_page(), dma_direct_alloc*() methods do take the offsets into account. What we can see now is that the same physical address will be differently mapped by the dma_map_resource() and, for instance, dma_map_sg() methods. All of these methods expect to have the "phys_addr_t" address passed, which is the CPU address, not the DMA one. Doesn't that look erroneous? IIUC in accordance with the common kernel definition the "resource" suffix indicates the CPU-visible address (like struct resource range), not the DMA address. No matter whether it's used to describe the RAM or MMIO range. AFAICS the dma_range_map just defines the offset-based DMA-to-CPU mapping for the particular bus/device. If the device driver already knows the DMA address why does it need to map it at all? I see some contradiction here. > > May I ask what exactly your setup looks like, if you have a DMA controller > with an offset view of its "own" MMIO space? I don't have such. But what I see here is either the wrong dma_direct_map_resource() implementation or a redundant mapping performed in some platforms/DMA-device drivers. Indeed judging by the dma_map_resource() method declaration it expects to have the CPU-address passed, which will be mapped in accordance with the "dma-ranges"-based DMA-to-CPU memory mapping in the same way as the rest of the dma_direct-family methods. If DMA address is already known then it is supposed to be used as-is with no any additional remapping procedure performed. The last but not least regarding the DMA controllers and the dma_map_resource() usage. The dma_slave_config structure was converted to having the CPU-physical src/dst address specified in commit 9575632052ba ("dmaengine: make slave address physical"). So the DMA client drivers now have to set the slave source and destination addresses defined in the CPU address space, while the DMA engine driver needs to map it in accordance with the platform/device specific configs. To sum up as I see it the problem is in the dma_map_resource() semantics still exist. The semantic isn't documented in any way while its implementation looks confusing. You say that the method expects to have the DMA address passed, but at the
Re: [PATCH RFC 07/12] iommufd: Data structure to provide IOVA to PFN mapping
On 2022/4/13 22:49, Yi Liu wrote: On 2022/4/13 22:36, Jason Gunthorpe wrote: On Wed, Apr 13, 2022 at 10:02:58PM +0800, Yi Liu wrote: +/** + * iopt_unmap_iova() - Remove a range of iova + * @iopt: io_pagetable to act on + * @iova: Starting iova to unmap + * @length: Number of bytes to unmap + * + * The requested range must exactly match an existing range. + * Splitting/truncating IOVA mappings is not allowed. + */ +int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, + unsigned long length) +{ + struct iopt_pages *pages; + struct iopt_area *area; + unsigned long iova_end; + int rc; + + if (!length) + return -EINVAL; + + if (check_add_overflow(iova, length - 1, _end)) + return -EOVERFLOW; + + down_read(>domains_rwsem); + down_write(>iova_rwsem); + area = iopt_find_exact_area(iopt, iova, iova_end); when testing vIOMMU with Qemu using iommufd, I hit a problem as log #3 shows. Qemu failed when trying to do map due to an IOVA still in use. After debugging, the 0xf000 IOVA is mapped but not unmapped. But per log #2, Qemu has issued unmap with a larger range (0xff00 - 0x1) which includes the 0xf000. But iopt_find_exact_area() doesn't find any area. So 0xf000 is not unmapped. Is this correct? Same test passed with vfio iommu type1 driver. any idea? There are a couple of good reasons why the iopt_unmap_iova() should proccess any contiguous range of fully contained areas, so I would consider this something worth fixing. can you send a small patch and test case and I'll fold it in? sure. just spotted it, so haven't got fix patch yet. I may work on it tomorrow. Hi Jason, Got below patch for it. Also pushed to the exploration branch. https://github.com/luxis1999/iommufd/commit/d764f3288de0fd52c578684788a437701ec31b2d From 22a758c401a1c7f6656625013bb87204c9ea65fe Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Sun, 17 Apr 2022 07:39:03 -0700 Subject: [PATCH] iommufd/io_pagetable: Support unmap fully contained areas Changes: - return the unmapped bytes to caller - supports unmap fully containerd contiguous areas - add a test case in selftest Signed-off-by: Yi Liu --- drivers/iommu/iommufd/io_pagetable.c| 90 - drivers/iommu/iommufd/ioas.c| 8 ++- drivers/iommu/iommufd/iommufd_private.h | 4 +- drivers/iommu/iommufd/vfio_compat.c | 8 ++- include/uapi/linux/iommufd.h| 2 +- tools/testing/selftests/iommu/iommufd.c | 40 +++ 6 files changed, 99 insertions(+), 53 deletions(-) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index f9f3b06946bf..5142f797a812 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -315,61 +315,26 @@ static int __iopt_unmap_iova(struct io_pagetable *iopt, struct iopt_area *area, return 0; } -/** - * iopt_unmap_iova() - Remove a range of iova - * @iopt: io_pagetable to act on - * @iova: Starting iova to unmap - * @length: Number of bytes to unmap - * - * The requested range must exactly match an existing range. - * Splitting/truncating IOVA mappings is not allowed. - */ -int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, - unsigned long length) -{ - struct iopt_pages *pages; - struct iopt_area *area; - unsigned long iova_end; - int rc; - - if (!length) - return -EINVAL; - - if (check_add_overflow(iova, length - 1, _end)) - return -EOVERFLOW; - - down_read(>domains_rwsem); - down_write(>iova_rwsem); - area = iopt_find_exact_area(iopt, iova, iova_end); - if (!area) { - up_write(>iova_rwsem); - up_read(>domains_rwsem); - return -ENOENT; - } - pages = area->pages; - area->pages = NULL; - up_write(>iova_rwsem); - - rc = __iopt_unmap_iova(iopt, area, pages); - up_read(>domains_rwsem); - return rc; -} - -int iopt_unmap_all(struct io_pagetable *iopt) +static int __iopt_unmap_iova_range(struct io_pagetable *iopt, + unsigned long start, + unsigned long end, + unsigned long *unmapped) { struct iopt_area *area; + unsigned long unmapped_bytes = 0; int rc; down_read(>domains_rwsem); down_write(>iova_rwsem); - while ((area = iopt_area_iter_first(iopt, 0, ULONG_MAX))) { + while ((area = iopt_area_iter_first(iopt, start, end))) { struct iopt_pages *pages; - /* Userspace should not race unmap all and map */ - if (!area->pages) { - rc = -EBUSY; + if (!area->pages || iopt_area_iova(area) < start || + iopt_area_last_iova(area) > end) { + rc = -ENOENT; goto
[Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
Tegra194 and Tegra234 SoCs have the erratum that causes walk cache entries to not be invalidated correctly. The problem is that the walk cache index generated for IOVA is not same across translation and invalidation requests. This is leading to page faults when PMD entry is released during unmap and populated with new PTE table during subsequent map request. Disabling large page mappings avoids the release of PMD entry and avoid translations seeing stale PMD entry in walk cache. Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and Tegra234 devices. This is recommended fix from Tegra hardware design team. Co-developed-by: Pritesh Raithatha Signed-off-by: Pritesh Raithatha Signed-off-by: Ashish Mhetre --- drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 drivers/iommu/arm/arm-smmu/arm-smmu.c| 3 +++ drivers/iommu/arm/arm-smmu/arm-smmu.h| 1 + 3 files changed, 27 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 01e9b50b10a1..b7a3d06da2f4 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi dev_name(dev), err); } +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu) +{ + const struct device_node *np = smmu->dev->of_node; + + /* +* Tegra194 and Tegra234 SoCs have the erratum that causes walk cache +* entries to not be invalidated correctly. The problem is that the walk +* cache index generated for IOVA is not same across translation and +* invalidation requests. This is leading to page faults when PMD entry +* is released during unmap and populated with new PTE table during +* subsequent map request. Disabling large page mappings avoids the +* release of PMD entry and avoid translations seeing stale PMD entry in +* walk cache. +* Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and +* Tegra234. +*/ + if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || + of_device_is_compatible(np, "nvidia,tegra194-smmu")) + smmu->pgsize_bitmap = PAGE_SIZE; +} + static const struct arm_smmu_impl nvidia_smmu_impl = { .read_reg = nvidia_smmu_read_reg, .write_reg = nvidia_smmu_write_reg, @@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = { .global_fault = nvidia_smmu_global_fault, .context_fault = nvidia_smmu_context_fault, .probe_finalize = nvidia_smmu_probe_finalize, + .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap, }; static const struct arm_smmu_impl nvidia_smmu_single_impl = { .probe_finalize = nvidia_smmu_probe_finalize, + .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap, }; struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 568cce590ccc..3692a19a588a 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K) smmu->pgsize_bitmap |= SZ_64K | SZ_512M; + if (smmu->impl && smmu->impl->cfg_pgsize_bitmap) + smmu->impl->cfg_pgsize_bitmap(smmu); + if (arm_smmu_ops.pgsize_bitmap == -1UL) arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; else diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 2b9b42fb6f30..5d9b03024969 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -442,6 +442,7 @@ struct arm_smmu_impl { void (*write_s2cr)(struct arm_smmu_device *smmu, int idx); void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg); void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev); + void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu); }; #define INVALID_SMENDX -1 -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu