[RESEND PATCH v8 11/11] iommu: Remove iommu group changes notifier

2022-04-17 Thread Lu Baolu
The iommu group changes notifer is not referenced in the tree. Remove it
to avoid dead code.

Suggested-by: Christoph Hellwig 
Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
---
 include/linux/iommu.h | 23 -
 drivers/iommu/iommu.c | 75 ---
 2 files changed, 98 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 77972ef978b5..6ef2df258673 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -407,13 +407,6 @@ static inline const struct iommu_ops *dev_iommu_ops(struct 
device *dev)
return dev->iommu->iommu_dev->ops;
 }
 
-#define IOMMU_GROUP_NOTIFY_ADD_DEVICE  1 /* Device added */
-#define IOMMU_GROUP_NOTIFY_DEL_DEVICE  2 /* Pre Device removed */
-#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */
-#define IOMMU_GROUP_NOTIFY_BOUND_DRIVER4 /* Post Driver bind */
-#define IOMMU_GROUP_NOTIFY_UNBIND_DRIVER   5 /* Pre Driver unbind */
-#define IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER  6 /* Post Driver unbind */
-
 extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops);
 extern int bus_iommu_probe(struct bus_type *bus);
 extern bool iommu_present(struct bus_type *bus);
@@ -478,10 +471,6 @@ extern int iommu_group_for_each_dev(struct iommu_group 
*group, void *data,
 extern struct iommu_group *iommu_group_get(struct device *dev);
 extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group);
 extern void iommu_group_put(struct iommu_group *group);
-extern int iommu_group_register_notifier(struct iommu_group *group,
-struct notifier_block *nb);
-extern int iommu_group_unregister_notifier(struct iommu_group *group,
-  struct notifier_block *nb);
 extern int iommu_register_device_fault_handler(struct device *dev,
iommu_dev_fault_handler_t handler,
void *data);
@@ -878,18 +867,6 @@ static inline void iommu_group_put(struct iommu_group 
*group)
 {
 }
 
-static inline int iommu_group_register_notifier(struct iommu_group *group,
-   struct notifier_block *nb)
-{
-   return -ENODEV;
-}
-
-static inline int iommu_group_unregister_notifier(struct iommu_group *group,
- struct notifier_block *nb)
-{
-   return 0;
-}
-
 static inline
 int iommu_register_device_fault_handler(struct device *dev,
iommu_dev_fault_handler_t handler,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index eba8e8ccf19d..0c42ece25854 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -18,7 +18,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -40,7 +39,6 @@ struct iommu_group {
struct kobject *devices_kobj;
struct list_head devices;
struct mutex mutex;
-   struct blocking_notifier_head notifier;
void *iommu_data;
void (*iommu_data_release)(void *iommu_data);
char *name;
@@ -632,7 +630,6 @@ struct iommu_group *iommu_group_alloc(void)
mutex_init(>mutex);
INIT_LIST_HEAD(>devices);
INIT_LIST_HEAD(>entry);
-   BLOCKING_INIT_NOTIFIER_HEAD(>notifier);
 
ret = ida_simple_get(_group_ida, 0, 0, GFP_KERNEL);
if (ret < 0) {
@@ -905,10 +902,6 @@ int iommu_group_add_device(struct iommu_group *group, 
struct device *dev)
if (ret)
goto err_put_group;
 
-   /* Notify any listeners about change to group. */
-   blocking_notifier_call_chain(>notifier,
-IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
-
trace_add_device_to_group(group->id, dev);
 
dev_info(dev, "Adding to iommu group %d\n", group->id);
@@ -950,10 +943,6 @@ void iommu_group_remove_device(struct device *dev)
 
dev_info(dev, "Removing from iommu group %d\n", group->id);
 
-   /* Pre-notify listeners that a device is being removed. */
-   blocking_notifier_call_chain(>notifier,
-IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
-
mutex_lock(>mutex);
list_for_each_entry(tmp_device, >devices, list) {
if (tmp_device->dev == dev) {
@@ -1075,36 +1064,6 @@ void iommu_group_put(struct iommu_group *group)
 }
 EXPORT_SYMBOL_GPL(iommu_group_put);
 
-/**
- * iommu_group_register_notifier - Register a notifier for group changes
- * @group: the group to watch
- * @nb: notifier block to signal
- *
- * This function allows iommu group users to track changes in a group.
- * See include/linux/iommu.h for actions sent via this notifier.  Caller
- * should hold a reference to the group throughout notifier registration.
- */
-int iommu_group_register_notifier(struct iommu_group *group,
- struct notifier_block *nb)
-{
-   

[RESEND PATCH v8 10/11] vfio: Remove iommu group notifier

2022-04-17 Thread Lu Baolu
The iommu core and driver core have been enhanced to avoid unsafe driver
binding to a live group after iommu_group_set_dma_owner(PRIVATE_USER)
has been called. There's no need to register iommu group notifier. This
removes the iommu group notifer which contains BUG_ON() and WARN().

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Acked-by: Alex Williamson 
---
 drivers/vfio/vfio.c | 147 
 1 file changed, 147 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index b2f19d17d0c3..0c766384cee0 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -71,7 +71,6 @@ struct vfio_group {
struct vfio_container   *container;
struct list_headdevice_list;
struct mutexdevice_lock;
-   struct notifier_block   nb;
struct list_headvfio_next;
struct list_headcontainer_next;
atomic_topened;
@@ -274,8 +273,6 @@ void vfio_unregister_iommu_driver(const struct 
vfio_iommu_driver_ops *ops)
 }
 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
 
-static int vfio_iommu_group_notifier(struct notifier_block *nb,
-unsigned long action, void *data);
 static void vfio_group_get(struct vfio_group *group);
 
 /*
@@ -395,13 +392,6 @@ static struct vfio_group *vfio_create_group(struct 
iommu_group *iommu_group,
goto err_put;
}
 
-   group->nb.notifier_call = vfio_iommu_group_notifier;
-   err = iommu_group_register_notifier(iommu_group, >nb);
-   if (err) {
-   ret = ERR_PTR(err);
-   goto err_put;
-   }
-
mutex_lock(_lock);
 
/* Did we race creating this group? */
@@ -422,7 +412,6 @@ static struct vfio_group *vfio_create_group(struct 
iommu_group *iommu_group,
 
 err_unlock:
mutex_unlock(_lock);
-   iommu_group_unregister_notifier(group->iommu_group, >nb);
 err_put:
put_device(>dev);
return ret;
@@ -447,7 +436,6 @@ static void vfio_group_put(struct vfio_group *group)
cdev_device_del(>cdev, >dev);
mutex_unlock(_lock);
 
-   iommu_group_unregister_notifier(group->iommu_group, >nb);
put_device(>dev);
 }
 
@@ -503,141 +491,6 @@ static struct vfio_device *vfio_group_get_device(struct 
vfio_group *group,
return NULL;
 }
 
-/*
- * Some drivers, like pci-stub, are only used to prevent other drivers from
- * claiming a device and are therefore perfectly legitimate for a user owned
- * group.  The pci-stub driver has no dependencies on DMA or the IOVA mapping
- * of the device, but it does prevent the user from having direct access to
- * the device, which is useful in some circumstances.
- *
- * We also assume that we can include PCI interconnect devices, ie. bridges.
- * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
- * then all of the downstream devices will be part of the same IOMMU group as
- * the bridge.  Thus, if placing the bridge into the user owned IOVA space
- * breaks anything, it only does so for user owned devices downstream.  Note
- * that error notification via MSI can be affected for platforms that handle
- * MSI within the same IOVA space as DMA.
- */
-static const char * const vfio_driver_allowed[] = { "pci-stub" };
-
-static bool vfio_dev_driver_allowed(struct device *dev,
-   struct device_driver *drv)
-{
-   if (dev_is_pci(dev)) {
-   struct pci_dev *pdev = to_pci_dev(dev);
-
-   if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
-   return true;
-   }
-
-   return match_string(vfio_driver_allowed,
-   ARRAY_SIZE(vfio_driver_allowed),
-   drv->name) >= 0;
-}
-
-/*
- * A vfio group is viable for use by userspace if all devices are in
- * one of the following states:
- *  - driver-less
- *  - bound to a vfio driver
- *  - bound to an otherwise allowed driver
- *  - a PCI interconnect device
- *
- * We use two methods to determine whether a device is bound to a vfio
- * driver.  The first is to test whether the device exists in the vfio
- * group.  The second is to test if the device exists on the group
- * unbound_list, indicating it's in the middle of transitioning from
- * a vfio driver to driver-less.
- */
-static int vfio_dev_viable(struct device *dev, void *data)
-{
-   struct vfio_group *group = data;
-   struct vfio_device *device;
-   struct device_driver *drv = READ_ONCE(dev->driver);
-
-   if (!drv || vfio_dev_driver_allowed(dev, drv))
-   return 0;
-
-   device = vfio_group_get_device(group, dev);
-   if (device) {
-   vfio_device_put(device);
-   return 0;
-   }
-
-   return -EINVAL;
-}
-
-/*
- * Async device support
- */
-static int vfio_group_nb_add_dev(struct vfio_group *group, struct 

[RESEND PATCH v8 09/11] vfio: Delete the unbound_list

2022-04-17 Thread Lu Baolu
From: Jason Gunthorpe 

commit 60720a0fc646 ("vfio: Add device tracking during unbind") added the
unbound list to plug a problem with KVM where KVM_DEV_VFIO_GROUP_DEL
relied on vfio_group_get_external_user() succeeding to return the
vfio_group from a group file descriptor. The unbound list allowed
vfio_group_get_external_user() to continue to succeed in edge cases.

However commit 5d6dee80a1e9 ("vfio: New external user group/file match")
deleted the call to vfio_group_get_external_user() during
KVM_DEV_VFIO_GROUP_DEL. Instead vfio_external_group_match_file() is used
to directly match the file descriptor to the group pointer.

This in turn avoids the call down to vfio_dev_viable() during
KVM_DEV_VFIO_GROUP_DEL and also avoids the trouble the first commit was
trying to fix.

There are no other users of vfio_dev_viable() that care about the time
after vfio_unregister_group_dev() returns, so simply delete the
unbound_list entirely.

Reviewed-by: Chaitanya Kulkarni 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Lu Baolu 
Acked-by: Alex Williamson 
---
 drivers/vfio/vfio.c | 74 ++---
 1 file changed, 2 insertions(+), 72 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 8a9347f732a5..b2f19d17d0c3 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -62,11 +62,6 @@ struct vfio_container {
boolnoiommu;
 };
 
-struct vfio_unbound_dev {
-   struct device   *dev;
-   struct list_headunbound_next;
-};
-
 struct vfio_group {
struct device   dev;
struct cdev cdev;
@@ -79,8 +74,6 @@ struct vfio_group {
struct notifier_block   nb;
struct list_headvfio_next;
struct list_headcontainer_next;
-   struct list_headunbound_list;
-   struct mutexunbound_lock;
atomic_topened;
wait_queue_head_t   container_q;
enum vfio_group_typetype;
@@ -340,16 +333,8 @@ vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 static void vfio_group_release(struct device *dev)
 {
struct vfio_group *group = container_of(dev, struct vfio_group, dev);
-   struct vfio_unbound_dev *unbound, *tmp;
-
-   list_for_each_entry_safe(unbound, tmp,
->unbound_list, unbound_next) {
-   list_del(>unbound_next);
-   kfree(unbound);
-   }
 
mutex_destroy(>device_lock);
-   mutex_destroy(>unbound_lock);
iommu_group_put(group->iommu_group);
ida_free(_ida, MINOR(group->dev.devt));
kfree(group);
@@ -381,8 +366,6 @@ static struct vfio_group *vfio_group_alloc(struct 
iommu_group *iommu_group,
refcount_set(>users, 1);
INIT_LIST_HEAD(>device_list);
mutex_init(>device_lock);
-   INIT_LIST_HEAD(>unbound_list);
-   mutex_init(>unbound_lock);
init_waitqueue_head(>container_q);
group->iommu_group = iommu_group;
/* put in vfio_group_release() */
@@ -571,19 +554,8 @@ static int vfio_dev_viable(struct device *dev, void *data)
struct vfio_group *group = data;
struct vfio_device *device;
struct device_driver *drv = READ_ONCE(dev->driver);
-   struct vfio_unbound_dev *unbound;
-   int ret = -EINVAL;
 
-   mutex_lock(>unbound_lock);
-   list_for_each_entry(unbound, >unbound_list, unbound_next) {
-   if (dev == unbound->dev) {
-   ret = 0;
-   break;
-   }
-   }
-   mutex_unlock(>unbound_lock);
-
-   if (!ret || !drv || vfio_dev_driver_allowed(dev, drv))
+   if (!drv || vfio_dev_driver_allowed(dev, drv))
return 0;
 
device = vfio_group_get_device(group, dev);
@@ -592,7 +564,7 @@ static int vfio_dev_viable(struct device *dev, void *data)
return 0;
}
 
-   return ret;
+   return -EINVAL;
 }
 
 /*
@@ -634,7 +606,6 @@ static int vfio_iommu_group_notifier(struct notifier_block 
*nb,
 {
struct vfio_group *group = container_of(nb, struct vfio_group, nb);
struct device *dev = data;
-   struct vfio_unbound_dev *unbound;
 
switch (action) {
case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
@@ -663,28 +634,6 @@ static int vfio_iommu_group_notifier(struct notifier_block 
*nb,
__func__, iommu_group_id(group->iommu_group),
dev->driver->name);
break;
-   case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
-   dev_dbg(dev, "%s: group %d unbound from driver\n", __func__,
-   iommu_group_id(group->iommu_group));
-   /*
-* XXX An unbound device in a live group is ok, but we'd
-* really like to avoid the above BUG_ON by preventing other
- 

[RESEND PATCH v8 08/11] vfio: Remove use of vfio_group_viable()

2022-04-17 Thread Lu Baolu
As DMA ownership is claimed for the iommu group when a VFIO group is
added to a VFIO container, the VFIO group viability is guaranteed as long
as group->container_users > 0. Remove those unnecessary group viability
checks which are only hit when group->container_users is not zero.

The only remaining reference is in GROUP_GET_STATUS, which could be called
at any time when group fd is valid. Here we just replace the
vfio_group_viable() by directly calling IOMMU core to get viability status.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Acked-by: Alex Williamson 
---
 drivers/vfio/vfio.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 56e741cbccce..8a9347f732a5 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1313,12 +1313,6 @@ static int vfio_group_set_container(struct vfio_group 
*group, int container_fd)
return ret;
 }
 
-static bool vfio_group_viable(struct vfio_group *group)
-{
-   return (iommu_group_for_each_dev(group->iommu_group,
-group, vfio_dev_viable) == 0);
-}
-
 static int vfio_group_add_container_user(struct vfio_group *group)
 {
if (!atomic_inc_not_zero(>container_users))
@@ -1328,7 +1322,7 @@ static int vfio_group_add_container_user(struct 
vfio_group *group)
atomic_dec(>container_users);
return -EPERM;
}
-   if (!group->container->iommu_driver || !vfio_group_viable(group)) {
+   if (!group->container->iommu_driver) {
atomic_dec(>container_users);
return -EINVAL;
}
@@ -1346,7 +1340,7 @@ static int vfio_group_get_device_fd(struct vfio_group 
*group, char *buf)
int ret = 0;
 
if (0 == atomic_read(>container_users) ||
-   !group->container->iommu_driver || !vfio_group_viable(group))
+   !group->container->iommu_driver)
return -EINVAL;
 
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
@@ -1438,11 +1432,11 @@ static long vfio_group_fops_unl_ioctl(struct file 
*filep,
 
status.flags = 0;
 
-   if (vfio_group_viable(group))
-   status.flags |= VFIO_GROUP_FLAGS_VIABLE;
-
if (group->container)
-   status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
+   status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
+   VFIO_GROUP_FLAGS_VIABLE;
+   else if (!iommu_group_dma_owner_claimed(group->iommu_group))
+   status.flags |= VFIO_GROUP_FLAGS_VIABLE;
 
if (copy_to_user((void __user *)arg, , minsz))
return -EFAULT;
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v8 07/11] vfio: Set DMA ownership for VFIO devices

2022-04-17 Thread Lu Baolu
Claim group dma ownership when an IOMMU group is set to a container,
and release the dma ownership once the iommu group is unset from the
container.

This change disallows some unsafe bridge drivers to bind to non-ACS
bridges while devices under them are assigned to user space. This is an
intentional enhancement and possibly breaks some existing
configurations. The recommendation to such an affected user would be
that the previously allowed host bridge driver was unsafe for this use
case and to continue to enable assignment of devices within that group,
the driver should be unbound from the bridge device or replaced with the
pci-stub driver.

For any bridge driver, we consider it unsafe if it satisfies any of the
following conditions:

  1) The bridge driver uses DMA. Calling pci_set_master() or calling any
 kernel DMA API (dma_map_*() and etc.) is an indicate that the
 driver is doing DMA.

  2) If the bridge driver uses MMIO, it should be tolerant to hostile
 userspace also touching the same MMIO registers via P2P DMA
 attacks.

If the bridge driver turns out to be a safe one, it could be used as
before by setting the driver's .driver_managed_dma field, just like what
we have done in the pcieport driver.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Acked-by: Alex Williamson 
---
 drivers/vfio/fsl-mc/vfio_fsl_mc.c |  1 +
 drivers/vfio/pci/vfio_pci.c   |  1 +
 drivers/vfio/platform/vfio_amba.c |  1 +
 drivers/vfio/platform/vfio_platform.c |  1 +
 drivers/vfio/vfio.c   | 10 +-
 5 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c 
b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 6e2e62c6f47a..3feff729f3ce 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -588,6 +588,7 @@ static struct fsl_mc_driver vfio_fsl_mc_driver = {
.name   = "vfio-fsl-mc",
.owner  = THIS_MODULE,
},
+   .driver_managed_dma = true,
 };
 
 static int __init vfio_fsl_mc_driver_init(void)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 2b047469e02f..58839206d1ca 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -194,6 +194,7 @@ static struct pci_driver vfio_pci_driver = {
.remove = vfio_pci_remove,
.sriov_configure= vfio_pci_sriov_configure,
.err_handler= _pci_core_err_handlers,
+   .driver_managed_dma = true,
 };
 
 static void __init vfio_pci_fill_ids(void)
diff --git a/drivers/vfio/platform/vfio_amba.c 
b/drivers/vfio/platform/vfio_amba.c
index badfffea14fb..1aaa4f721bd2 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -95,6 +95,7 @@ static struct amba_driver vfio_amba_driver = {
.name = "vfio-amba",
.owner = THIS_MODULE,
},
+   .driver_managed_dma = true,
 };
 
 module_amba_driver(vfio_amba_driver);
diff --git a/drivers/vfio/platform/vfio_platform.c 
b/drivers/vfio/platform/vfio_platform.c
index 68a1c87066d7..04f40c5acfd6 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -76,6 +76,7 @@ static struct platform_driver vfio_platform_driver = {
.driver = {
.name   = "vfio-platform",
},
+   .driver_managed_dma = true,
 };
 
 module_platform_driver(vfio_platform_driver);
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index a4555014bd1e..56e741cbccce 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1198,6 +1198,8 @@ static void __vfio_group_unset_container(struct 
vfio_group *group)
driver->ops->detach_group(container->iommu_data,
  group->iommu_group);
 
+   iommu_group_release_dma_owner(group->iommu_group);
+
group->container = NULL;
wake_up(>container_q);
list_del(>container_next);
@@ -1282,13 +1284,19 @@ static int vfio_group_set_container(struct vfio_group 
*group, int container_fd)
goto unlock_out;
}
 
+   ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
+   if (ret)
+   goto unlock_out;
+
driver = container->iommu_driver;
if (driver) {
ret = driver->ops->attach_group(container->iommu_data,
group->iommu_group,
group->type);
-   if (ret)
+   if (ret) {
+   iommu_group_release_dma_owner(group->iommu_group);
goto unlock_out;
+   }
}
 
group->container = container;
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v8 03/11] amba: Stop sharing platform_dma_configure()

2022-04-17 Thread Lu Baolu
Stop sharing platform_dma_configure() helper as they are about to have
their own bus dma_configure callbacks.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
---
 include/linux/platform_device.h |  2 --
 drivers/amba/bus.c  | 19 ++-
 drivers/base/platform.c |  3 +--
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 7c96f169d274..17fde717df68 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -328,8 +328,6 @@ extern int platform_pm_restore(struct device *dev);
 #define platform_pm_restoreNULL
 #endif
 
-extern int platform_dma_configure(struct device *dev);
-
 #ifdef CONFIG_PM_SLEEP
 #define USE_PLATFORM_PM_SLEEP_OPS \
.suspend = platform_pm_suspend, \
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index d3bd14aaabf6..76b52bd2c2a4 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -20,6 +20,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #define to_amba_driver(d)  container_of(d, struct amba_driver, drv)
 
@@ -273,6 +275,21 @@ static void amba_shutdown(struct device *dev)
drv->shutdown(to_amba_device(dev));
 }
 
+static int amba_dma_configure(struct device *dev)
+{
+   enum dev_dma_attr attr;
+   int ret = 0;
+
+   if (dev->of_node) {
+   ret = of_dma_configure(dev, dev->of_node, true);
+   } else if (has_acpi_companion(dev)) {
+   attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode));
+   ret = acpi_dma_configure(dev, attr);
+   }
+
+   return ret;
+}
+
 #ifdef CONFIG_PM
 /*
  * Hooks to provide runtime PM of the pclk (bus clock).  It is safe to
@@ -341,7 +358,7 @@ struct bus_type amba_bustype = {
.probe  = amba_probe,
.remove = amba_remove,
.shutdown   = amba_shutdown,
-   .dma_configure  = platform_dma_configure,
+   .dma_configure  = amba_dma_configure,
.pm = _pm,
 };
 EXPORT_SYMBOL_GPL(amba_bustype);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8cc272fd5c99..d7915734d931 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1454,8 +1454,7 @@ static void platform_shutdown(struct device *_dev)
drv->shutdown(dev);
 }
 
-
-int platform_dma_configure(struct device *dev)
+static int platform_dma_configure(struct device *dev)
 {
enum dev_dma_attr attr;
int ret = 0;
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v8 04/11] bus: platform, amba, fsl-mc, PCI: Add device DMA ownership management

2022-04-17 Thread Lu Baolu
The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers
with the device DMA managed by kernel drivers or user-space applications.
Unfortunately, multiple devices may be placed in the same IOMMU group
because they cannot be isolated from each other. The DMA on these devices
must either be entirely under kernel control or userspace control, never
a mixture. Otherwise the driver integrity is not guaranteed because they
could access each other through the peer-to-peer accesses which by-pass
the IOMMU protection.

This checks and sets the default DMA mode during driver binding, and
cleanups during driver unbinding. In the default mode, the device DMA is
managed by the device driver which handles DMA operations through the
kernel DMA APIs (see Documentation/core-api/dma-api.rst).

For cases where the devices are assigned for userspace control through the
userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/
vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this
default setting in the assumption that the drivers know what they are
doing with the device DMA.

Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure
is currently a problem. As things stand, the IOMMU driver ignored the
initial iommu_probe_device() call when the device was added, since at
that point it had no fwspec yet. In this situation,
{of,acpi}_iommu_configure() are retriggering iommu_probe_device() after
the IOMMU driver has seen the firmware data via .of_xlate to learn that
it actually responsible for the given device. As the result, before
that gets fixed, iommu_use_default_domain() goes at the end, and calls
arch_teardown_dma_ops() if it fails.

Cc: Greg Kroah-Hartman 
Cc: Bjorn Helgaas 
Cc: Stuart Yoder 
Cc: Laurentiu Tudor 
Signed-off-by: Lu Baolu 
Reviewed-by: Greg Kroah-Hartman 
Reviewed-by: Jason Gunthorpe 
Reviewed-by: Robin Murphy 
Tested-by: Eric Auger 
---
 include/linux/amba/bus.h|  8 
 include/linux/fsl/mc.h  |  8 
 include/linux/pci.h |  8 
 include/linux/platform_device.h |  8 
 drivers/amba/bus.c  | 18 ++
 drivers/base/platform.c | 18 ++
 drivers/bus/fsl-mc/fsl-mc-bus.c | 24 ++--
 drivers/pci/pci-driver.c| 18 ++
 8 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 6562f543c3e0..2ddce9bcd00e 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -79,6 +79,14 @@ struct amba_driver {
void(*remove)(struct amba_device *);
void(*shutdown)(struct amba_device *);
const struct amba_id*id_table;
+   /*
+* For most device drivers, no need to care about this flag as long as
+* all DMAs are handled through the kernel DMA API. For some special
+* ones, for example VFIO drivers, they know how to manage the DMA
+* themselves and set this flag so that the IOMMU layer will allow them
+* to setup and manage their own I/O address space.
+*/
+   bool driver_managed_dma;
 };
 
 /*
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 7b6c42bfb660..27efef8affb1 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -32,6 +32,13 @@ struct fsl_mc_io;
  * @shutdown: Function called at shutdown time to quiesce the device
  * @suspend: Function called when a device is stopped
  * @resume: Function called when a device is resumed
+ * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA.
+ * For most device drivers, no need to care about this flag
+ * as long as all DMAs are handled through the kernel DMA API.
+ * For some special ones, for example VFIO drivers, they know
+ * how to manage the DMA themselves and set this flag so that
+ * the IOMMU layer will allow them to setup and manage their
+ * own I/O address space.
  *
  * Generic DPAA device driver object for device drivers that are registered
  * with a DPRC bus. This structure is to be embedded in each device-specific
@@ -45,6 +52,7 @@ struct fsl_mc_driver {
void (*shutdown)(struct fsl_mc_device *dev);
int (*suspend)(struct fsl_mc_device *dev, pm_message_t state);
int (*resume)(struct fsl_mc_device *dev);
+   bool driver_managed_dma;
 };
 
 #define to_fsl_mc_driver(_drv) \
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 60adf42460ab..b933d2b08d4d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -895,6 +895,13 @@ struct module;
  *  created once it is bound to the driver.
  * @driver:Driver model structure.
  * @dynids:List of dynamically added device IDs.
+ * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA.
+ * For most device drivers, no need to 

[RESEND PATCH v8 06/11] PCI: portdrv: Set driver_managed_dma

2022-04-17 Thread Lu Baolu
If a switch lacks ACS P2P Request Redirect, a device below the switch can
bypass the IOMMU and DMA directly to other devices below the switch, so
all the downstream devices must be in the same IOMMU group as the switch
itself.

The existing VFIO framework allows the portdrv driver to be bound to the
bridge while its downstream devices are assigned to user space. The
pci_dma_configure() marks the IOMMU group as containing only devices
with kernel drivers that manage DMA. Avoid this default behavior for the
portdrv driver in order for compatibility with the current VFIO usage.

We achieve this by setting ".driver_managed_dma = true" in pci_driver
structure. It is safe because the portdrv driver meets below criteria:

- This driver doesn't use DMA, as you can't find any related calls like
  pci_set_master() or any kernel DMA API (dma_map_*() and etc.).
- It doesn't use MMIO as you can't find ioremap() or similar calls. It's
  tolerant to userspace possibly also touching the same MMIO registers
  via P2P DMA access.

Suggested-by: Jason Gunthorpe 
Suggested-by: Kevin Tian 
Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Acked-by: Bjorn Helgaas 
---
 drivers/pci/pcie/portdrv_pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 4b8801656ffb..7f8788a970ae 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -202,6 +202,8 @@ static struct pci_driver pcie_portdriver = {
 
.err_handler= _portdrv_err_handler,
 
+   .driver_managed_dma = true,
+
.driver.pm  = PCIE_PORTDRV_PM_OPS,
 };
 
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v8 05/11] PCI: pci_stub: Set driver_managed_dma

2022-04-17 Thread Lu Baolu
The current VFIO implementation allows pci-stub driver to be bound to
a PCI device with other devices in the same IOMMU group being assigned
to userspace. The pci-stub driver has no dependencies on DMA or the
IOVA mapping of the device, but it does prevent the user from having
direct access to the device, which is useful in some circumstances.

The pci_dma_configure() marks the iommu_group as containing only devices
with kernel drivers that manage DMA. For compatibility with the VFIO
usage, avoid this default behavior for the pci_stub. This allows the
pci_stub still able to be used by the admin to block driver binding after
applying the DMA ownership to VFIO.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Acked-by: Bjorn Helgaas 
---
 drivers/pci/pci-stub.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c
index e408099fea52..d1f4c1ce7bd1 100644
--- a/drivers/pci/pci-stub.c
+++ b/drivers/pci/pci-stub.c
@@ -36,6 +36,7 @@ static struct pci_driver stub_driver = {
.name   = "pci-stub",
.id_table   = NULL, /* only dynamic id's */
.probe  = pci_stub_probe,
+   .driver_managed_dma = true,
 };
 
 static int __init pci_stub_init(void)
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v8 02/11] driver core: Add dma_cleanup callback in bus_type

2022-04-17 Thread Lu Baolu
The bus_type structure defines dma_configure() callback for bus drivers
to configure DMA on the devices. This adds the paired dma_cleanup()
callback and calls it during driver unbinding so that bus drivers can do
some cleanup work.

One use case for this paired DMA callbacks is for the bus driver to check
for DMA ownership conflicts during driver binding, where multiple devices
belonging to a same IOMMU group (the minimum granularity of isolation and
protection) may be assigned to kernel drivers or user space respectively.

Without this change, for example, the vfio driver has to listen to a bus
BOUND_DRIVER event and then BUG_ON() in case of dma ownership conflict.
This leads to bad user experience since careless driver binding operation
may crash the system if the admin overlooks the group restriction. Aside
from bad design, this leads to a security problem as a root user, even with
lockdown=integrity, can force the kernel to BUG.

With this change, the bus driver could check and set the DMA ownership in
driver binding process and fail on ownership conflicts. The DMA ownership
should be released during driver unbinding.

Signed-off-by: Lu Baolu 
Reviewed-by: Greg Kroah-Hartman 
Reviewed-by: Jason Gunthorpe 
---
 include/linux/device/bus.h | 3 +++
 drivers/base/dd.c  | 5 +
 2 files changed, 8 insertions(+)

diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index a039ab809753..d8b29ccd07e5 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -59,6 +59,8 @@ struct fwnode_handle;
  * bus supports.
  * @dma_configure: Called to setup DMA configuration on a device on
  * this bus.
+ * @dma_cleanup:   Called to cleanup DMA configuration on a device on
+ * this bus.
  * @pm:Power management operations of this bus, callback the 
specific
  * device driver's pm-ops.
  * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
@@ -103,6 +105,7 @@ struct bus_type {
int (*num_vf)(struct device *dev);
 
int (*dma_configure)(struct device *dev);
+   void (*dma_cleanup)(struct device *dev);
 
const struct dev_pm_ops *pm;
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 3fc3b5940bb3..94b7ac9bf459 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -671,6 +671,8 @@ static int really_probe(struct device *dev, struct 
device_driver *drv)
if (dev->bus)
blocking_notifier_call_chain(>bus->p->bus_notifier,
 BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
+   if (dev->bus && dev->bus->dma_cleanup)
+   dev->bus->dma_cleanup(dev);
 pinctrl_bind_failed:
device_links_no_driver(dev);
device_unbind_cleanup(dev);
@@ -1199,6 +1201,9 @@ static void __device_release_driver(struct device *dev, 
struct device *parent)
 
device_remove(dev);
 
+   if (dev->bus && dev->bus->dma_cleanup)
+   dev->bus->dma_cleanup(dev);
+
device_links_driver_cleanup(dev);
device_unbind_cleanup(dev);
 
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v8 00/11] Fix BUG_ON in vfio_iommu_group_notifier()

2022-04-17 Thread Lu Baolu
Hi Joerg,

This is a resend version of v8 posted here:
https://lore.kernel.org/linux-iommu/20220308054421.847385-1-baolu...@linux.intel.com/
as we discussed in this thread:
https://lore.kernel.org/linux-iommu/yk%2fq1bgn8pc5h...@8bytes.org/

All patches can be applied perfectly except this one:
 - [PATCH v8 02/11] driver core: Add dma_cleanup callback in bus_type
It conflicts with below refactoring commit:
 - 4b775aaf1ea99 "driver core: Refactor sysfs and drv/bus remove hooks"
The conflict has been fixed in this post.

No functional changes in this series. I suppress cc-ing this series to
all v8 reviewers in order to avoid spam.

Please consider it for your iommu tree.

Best regards,
baolu

Change log:
- v8 and before:
  - Please refer to v8 post for all the change history.

- v8-resend
  - Rebase the series on top of v5.18-rc3.
  - Add Reviewed-by's granted by Robin.
  - Add a Tested-by granted by Eric.

Jason Gunthorpe (1):
  vfio: Delete the unbound_list

Lu Baolu (10):
  iommu: Add DMA ownership management interfaces
  driver core: Add dma_cleanup callback in bus_type
  amba: Stop sharing platform_dma_configure()
  bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management
  PCI: pci_stub: Set driver_managed_dma
  PCI: portdrv: Set driver_managed_dma
  vfio: Set DMA ownership for VFIO devices
  vfio: Remove use of vfio_group_viable()
  vfio: Remove iommu group notifier
  iommu: Remove iommu group changes notifier

 include/linux/amba/bus.h  |   8 +
 include/linux/device/bus.h|   3 +
 include/linux/fsl/mc.h|   8 +
 include/linux/iommu.h |  54 +++---
 include/linux/pci.h   |   8 +
 include/linux/platform_device.h   |  10 +-
 drivers/amba/bus.c|  37 +++-
 drivers/base/dd.c |   5 +
 drivers/base/platform.c   |  21 ++-
 drivers/bus/fsl-mc/fsl-mc-bus.c   |  24 ++-
 drivers/iommu/iommu.c | 228 
 drivers/pci/pci-driver.c  |  18 ++
 drivers/pci/pci-stub.c|   1 +
 drivers/pci/pcie/portdrv_pci.c|   2 +
 drivers/vfio/fsl-mc/vfio_fsl_mc.c |   1 +
 drivers/vfio/pci/vfio_pci.c   |   1 +
 drivers/vfio/platform/vfio_amba.c |   1 +
 drivers/vfio/platform/vfio_platform.c |   1 +
 drivers/vfio/vfio.c   | 245 ++
 19 files changed, 338 insertions(+), 338 deletions(-)

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v8 01/11] iommu: Add DMA ownership management interfaces

2022-04-17 Thread Lu Baolu
Multiple devices may be placed in the same IOMMU group because they
cannot be isolated from each other. These devices must either be
entirely under kernel control or userspace control, never a mixture.

This adds dma ownership management in iommu core and exposes several
interfaces for the device drivers and the device userspace assignment
framework (i.e. VFIO), so that any conflict between user and kernel
controlled dma could be detected at the beginning.

The device driver oriented interfaces are,

int iommu_device_use_default_domain(struct device *dev);
void iommu_device_unuse_default_domain(struct device *dev);

By calling iommu_device_use_default_domain(), the device driver tells
the iommu layer that the device dma is handled through the kernel DMA
APIs. The iommu layer will manage the IOVA and use the default domain
for DMA address translation.

The device user-space assignment framework oriented interfaces are,

int iommu_group_claim_dma_owner(struct iommu_group *group,
void *owner);
void iommu_group_release_dma_owner(struct iommu_group *group);
bool iommu_group_dma_owner_claimed(struct iommu_group *group);

The device userspace assignment must be disallowed if the DMA owner
claiming interface returns failure.

Signed-off-by: Jason Gunthorpe 
Signed-off-by: Kevin Tian 
Signed-off-by: Lu Baolu 
Reviewed-by: Robin Murphy 
---
 include/linux/iommu.h |  31 +
 drivers/iommu/iommu.c | 153 +-
 2 files changed, 181 insertions(+), 3 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9208eca4b0d1..77972ef978b5 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -675,6 +675,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 void iommu_sva_unbind_device(struct iommu_sva *handle);
 u32 iommu_sva_get_pasid(struct iommu_sva *handle);
 
+int iommu_device_use_default_domain(struct device *dev);
+void iommu_device_unuse_default_domain(struct device *dev);
+
+int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);
+void iommu_group_release_dma_owner(struct iommu_group *group);
+bool iommu_group_dma_owner_claimed(struct iommu_group *group);
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
@@ -1031,6 +1038,30 @@ static inline struct iommu_fwspec 
*dev_iommu_fwspec_get(struct device *dev)
 {
return NULL;
 }
+
+static inline int iommu_device_use_default_domain(struct device *dev)
+{
+   return 0;
+}
+
+static inline void iommu_device_unuse_default_domain(struct device *dev)
+{
+}
+
+static inline int
+iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
+{
+   return -ENODEV;
+}
+
+static inline void iommu_group_release_dma_owner(struct iommu_group *group)
+{
+}
+
+static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
+{
+   return false;
+}
 #endif /* CONFIG_IOMMU_API */
 
 /**
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f2c45b85b9fc..eba8e8ccf19d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -48,6 +48,8 @@ struct iommu_group {
struct iommu_domain *default_domain;
struct iommu_domain *domain;
struct list_head entry;
+   unsigned int owner_cnt;
+   void *owner;
 };
 
 struct group_device {
@@ -294,7 +296,11 @@ int iommu_probe_device(struct device *dev)
mutex_lock(>mutex);
iommu_alloc_default_domain(group, dev);
 
-   if (group->default_domain) {
+   /*
+* If device joined an existing group which has been claimed, don't
+* attach the default domain.
+*/
+   if (group->default_domain && !group->owner) {
ret = __iommu_attach_device(group->default_domain, dev);
if (ret) {
mutex_unlock(>mutex);
@@ -2109,7 +2115,7 @@ static int __iommu_attach_group(struct iommu_domain 
*domain,
 {
int ret;
 
-   if (group->default_domain && group->domain != group->default_domain)
+   if (group->domain && group->domain != group->default_domain)
return -EBUSY;
 
ret = __iommu_group_for_each_dev(group, domain,
@@ -2146,7 +2152,11 @@ static void __iommu_detach_group(struct iommu_domain 
*domain,
 {
int ret;
 
-   if (!group->default_domain) {
+   /*
+* If the group has been claimed already, do not re-attach the default
+* domain.
+*/
+   if (!group->default_domain || group->owner) {
__iommu_group_for_each_dev(group, domain,
   iommu_group_do_detach_device);
group->domain = NULL;
@@ -3095,3 +3105,140 @@ static ssize_t iommu_group_store_type(struct 
iommu_group *group,
 
return ret;
 }
+
+/**
+ * iommu_device_use_default_domain() - Device driver wants to handle device
+ * DMA through the kernel DMA API.
+ * @dev: 

Re: [PATCH 03/25] dma-direct: take dma-ranges/offsets into account in resource mapping

2022-04-17 Thread Serge Semin
Hello Robin.

Sorry for the delayed answer. My comments are below.

On Thu, Mar 24, 2022 at 11:30:38AM +, Robin Murphy wrote:
> On 2022-03-24 01:48, Serge Semin wrote:
> > A basic device-specific linear memory mapping was introduced back in
> > commit ("dma: Take into account dma_pfn_offset") as a single-valued offset
> > preserved in the device.dma_pfn_offset field, which was initialized for
> > instance by means of the "dma-ranges" DT property. Afterwards the
> > functionality was extended to support more than one device-specific region
> > defined in the device.dma_range_map list of maps. But all of these
> > improvements concerned a single pointer, page or sg DMA-mapping methods,
> > while the system resource mapping function turned to miss the
> > corresponding modification. Thus the dma_direct_map_resource() method now
> > just casts the CPU physical address to the device DMA address with no
> > dma-ranges-based mapping taking into account, which is obviously wrong.
> > Let's fix it by using the phys_to_dma_direct() method to get the
> > device-specific bus address from the passed memory resource for the case
> > of the directly mapped DMA.
> 

> It may not have been well-documented at the time, but this was largely
> intentional. The assumption based on known systems was that where
> dma_pfn_offset existed, it would *not* apply to peer MMIO addresses.

Well, I'd say it wasn't documented or even discussed at all. At least
after a pretty much comprehensive retrospective research I failed to
find any note about the reason of having all the dma_direct_map*()
methods converted to supporting the dma_pfn_offset/dma_range_map
ranges, but leaving the dma_direct_map_resource() method out of that
conversion. Neither it is immediately inferable from the method usage
and its prototype that it is supposed to be utilized for the DMA
memory addresses, not the CPU one.

> 
> For instance, DTs for TI Keystone 2 platforms only describe an offset for
> RAM:
> 
>   dma-ranges = <0x8000 0x8 0x 0x8000>;
> 
> but a DMA controller might also want to access something in the MMIO range
> 0x0-0x7fff, of which it still has an identical non-offset view. If a
> driver was previously using dma_map_resource() for that, it would now start
> getting DMA_MAPPING_ERROR because the dma_range_map exists but doesn't
> describe the MMIO region. I agree that in hindsight it's not an ideal
> situation, but it's how things have ended up, so at this point I'm wary of
> making potentially-breaking changes.

Hmm, what if the driver was previously using for instance the
dma_direct_map_sg() method for it? Following this logic you would have
needed to decline the whole dma_pfn_offset/dma_range_map ranges
support, since the dma_direct_map_sg(), dma_direct_map_page(),
dma_direct_alloc*() methods do take the offsets into account. What we
can see now is that the same physical address will be differently
mapped by the dma_map_resource() and, for instance, dma_map_sg()
methods. All of these methods expect to have the "phys_addr_t" address
passed, which is the CPU address, not the DMA one. Doesn't that look
erroneous? IIUC in accordance with the common kernel definition the
"resource" suffix indicates the CPU-visible address (like struct
resource range), not the DMA address. No matter whether it's used to
describe the RAM or MMIO range.

AFAICS the dma_range_map just defines the offset-based DMA-to-CPU
mapping for the particular bus/device. If the device driver already
knows the DMA address why does it need to map it at all? I see some
contradiction here.

> 
> May I ask what exactly your setup looks like, if you have a DMA controller
> with an offset view of its "own" MMIO space?

I don't have such. But what I see here is either the wrong
dma_direct_map_resource() implementation or a redundant mapping
performed in some platforms/DMA-device drivers. Indeed judging by the
dma_map_resource() method declaration it expects to have the
CPU-address passed, which will be mapped in accordance with the
"dma-ranges"-based DMA-to-CPU memory mapping in the same way as the
rest of the dma_direct-family methods. If DMA address is already known
then it is supposed to be used as-is with no any additional remapping
procedure performed.

The last but not least regarding the DMA controllers and the
dma_map_resource() usage. The dma_slave_config structure was converted
to having the CPU-physical src/dst address specified in commit
9575632052ba ("dmaengine: make slave address physical"). So the DMA
client drivers now have to set the slave source and destination
addresses defined in the CPU address space, while the DMA engine
driver needs to map it in accordance with the platform/device specific
configs.

To sum up as I see it the problem is in the dma_map_resource()
semantics still exist. The semantic isn't documented in any way while
its implementation looks confusing. You say that the method
expects to have the DMA address passed, but at the 

Re: [PATCH RFC 07/12] iommufd: Data structure to provide IOVA to PFN mapping

2022-04-17 Thread Yi Liu

On 2022/4/13 22:49, Yi Liu wrote:

On 2022/4/13 22:36, Jason Gunthorpe wrote:

On Wed, Apr 13, 2022 at 10:02:58PM +0800, Yi Liu wrote:

+/**
+ * iopt_unmap_iova() - Remove a range of iova
+ * @iopt: io_pagetable to act on
+ * @iova: Starting iova to unmap
+ * @length: Number of bytes to unmap
+ *
+ * The requested range must exactly match an existing range.
+ * Splitting/truncating IOVA mappings is not allowed.
+ */
+int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
+    unsigned long length)
+{
+    struct iopt_pages *pages;
+    struct iopt_area *area;
+    unsigned long iova_end;
+    int rc;
+
+    if (!length)
+    return -EINVAL;
+
+    if (check_add_overflow(iova, length - 1, _end))
+    return -EOVERFLOW;
+
+    down_read(>domains_rwsem);
+    down_write(>iova_rwsem);
+    area = iopt_find_exact_area(iopt, iova, iova_end);


when testing vIOMMU with Qemu using iommufd, I hit a problem as log #3
shows. Qemu failed when trying to do map due to an IOVA still in use.
After debugging, the 0xf000 IOVA is mapped but not unmapped. But per 
log

#2, Qemu has issued unmap with a larger range (0xff00 -
0x1) which includes the 0xf000. But iopt_find_exact_area()
doesn't find any area. So 0xf000 is not unmapped. Is this correct? Same
test passed with vfio iommu type1 driver. any idea?


There are a couple of good reasons why the iopt_unmap_iova() should
proccess any contiguous range of fully contained areas, so I would
consider this something worth fixing. can you send a small patch and
test case and I'll fold it in?


sure. just spotted it, so haven't got fix patch yet. I may work on
it tomorrow.


Hi Jason,

Got below patch for it. Also pushed to the exploration branch.

https://github.com/luxis1999/iommufd/commit/d764f3288de0fd52c578684788a437701ec31b2d

From 22a758c401a1c7f6656625013bb87204c9ea65fe Mon Sep 17 00:00:00 2001
From: Yi Liu 
Date: Sun, 17 Apr 2022 07:39:03 -0700
Subject: [PATCH] iommufd/io_pagetable: Support unmap fully contained areas

Changes:
- return the unmapped bytes to caller
- supports unmap fully containerd contiguous areas
- add a test case in selftest

Signed-off-by: Yi Liu 
---
 drivers/iommu/iommufd/io_pagetable.c| 90 -
 drivers/iommu/iommufd/ioas.c|  8 ++-
 drivers/iommu/iommufd/iommufd_private.h |  4 +-
 drivers/iommu/iommufd/vfio_compat.c |  8 ++-
 include/uapi/linux/iommufd.h|  2 +-
 tools/testing/selftests/iommu/iommufd.c | 40 +++
 6 files changed, 99 insertions(+), 53 deletions(-)

diff --git a/drivers/iommu/iommufd/io_pagetable.c 
b/drivers/iommu/iommufd/io_pagetable.c

index f9f3b06946bf..5142f797a812 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -315,61 +315,26 @@ static int __iopt_unmap_iova(struct io_pagetable 
*iopt, struct iopt_area *area,

return 0;
 }

-/**
- * iopt_unmap_iova() - Remove a range of iova
- * @iopt: io_pagetable to act on
- * @iova: Starting iova to unmap
- * @length: Number of bytes to unmap
- *
- * The requested range must exactly match an existing range.
- * Splitting/truncating IOVA mappings is not allowed.
- */
-int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
-   unsigned long length)
-{
-   struct iopt_pages *pages;
-   struct iopt_area *area;
-   unsigned long iova_end;
-   int rc;
-
-   if (!length)
-   return -EINVAL;
-
-   if (check_add_overflow(iova, length - 1, _end))
-   return -EOVERFLOW;
-
-   down_read(>domains_rwsem);
-   down_write(>iova_rwsem);
-   area = iopt_find_exact_area(iopt, iova, iova_end);
-   if (!area) {
-   up_write(>iova_rwsem);
-   up_read(>domains_rwsem);
-   return -ENOENT;
-   }
-   pages = area->pages;
-   area->pages = NULL;
-   up_write(>iova_rwsem);
-
-   rc = __iopt_unmap_iova(iopt, area, pages);
-   up_read(>domains_rwsem);
-   return rc;
-}
-
-int iopt_unmap_all(struct io_pagetable *iopt)
+static int __iopt_unmap_iova_range(struct io_pagetable *iopt,
+  unsigned long start,
+  unsigned long end,
+  unsigned long *unmapped)
 {
struct iopt_area *area;
+   unsigned long unmapped_bytes = 0;
int rc;

down_read(>domains_rwsem);
down_write(>iova_rwsem);
-   while ((area = iopt_area_iter_first(iopt, 0, ULONG_MAX))) {
+   while ((area = iopt_area_iter_first(iopt, start, end))) {
struct iopt_pages *pages;

-   /* Userspace should not race unmap all and map */
-   if (!area->pages) {
-   rc = -EBUSY;
+   if (!area->pages || iopt_area_iova(area) < start ||
+   iopt_area_last_iova(area) > end) {
+   rc = -ENOENT;
goto 

[Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu

2022-04-17 Thread Ashish Mhetre via iommu
Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
entries to not be invalidated correctly. The problem is that the walk
cache index generated for IOVA is not same across translation and
invalidation requests. This is leading to page faults when PMD entry is
released during unmap and populated with new PTE table during subsequent
map request. Disabling large page mappings avoids the release of PMD
entry and avoid translations seeing stale PMD entry in walk cache.
Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
Tegra234 devices. This is recommended fix from Tegra hardware design
team.

Co-developed-by: Pritesh Raithatha 
Signed-off-by: Pritesh Raithatha 
Signed-off-by: Ashish Mhetre 
---
 drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 
 drivers/iommu/arm/arm-smmu/arm-smmu.c|  3 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.h|  1 +
 3 files changed, 27 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
index 01e9b50b10a1..b7a3d06da2f4 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
@@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct 
arm_smmu_device *smmu, struct devi
dev_name(dev), err);
 }
 
+static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
+{
+   const struct device_node *np = smmu->dev->of_node;
+
+   /*
+* Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
+* entries to not be invalidated correctly. The problem is that the walk
+* cache index generated for IOVA is not same across translation and
+* invalidation requests. This is leading to page faults when PMD entry
+* is released during unmap and populated with new PTE table during
+* subsequent map request. Disabling large page mappings avoids the
+* release of PMD entry and avoid translations seeing stale PMD entry in
+* walk cache.
+* Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
+* Tegra234.
+*/
+   if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
+   of_device_is_compatible(np, "nvidia,tegra194-smmu"))
+   smmu->pgsize_bitmap = PAGE_SIZE;
+}
+
 static const struct arm_smmu_impl nvidia_smmu_impl = {
.read_reg = nvidia_smmu_read_reg,
.write_reg = nvidia_smmu_write_reg,
@@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
.global_fault = nvidia_smmu_global_fault,
.context_fault = nvidia_smmu_context_fault,
.probe_finalize = nvidia_smmu_probe_finalize,
+   .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 static const struct arm_smmu_impl nvidia_smmu_single_impl = {
.probe_finalize = nvidia_smmu_probe_finalize,
+   .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 568cce590ccc..3692a19a588a 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)
if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
 
+   if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
+   smmu->impl->cfg_pgsize_bitmap(smmu);
+
if (arm_smmu_ops.pgsize_bitmap == -1UL)
arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
else
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..5d9b03024969 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -442,6 +442,7 @@ struct arm_smmu_impl {
void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
void (*probe_finalize)(struct arm_smmu_device *smmu, struct device 
*dev);
+   void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
 };
 
 #define INVALID_SMENDX -1
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu