Currently, device dirty page tracking with vIOMMU is not supported,
and a blocker is added and the migration is prevented.

When vIOMMU is used, IOVA ranges are DMA mapped/unmapped on the fly as
requesting by the vIOMMU. These IOVA ranges can potentially be mapped
anywhere in the vIOMMU IOVA space as advertised by the VMM.

To support device dirty tracking when vIOMMU enabled instead create the
dirty ranges based on the vIOMMU provided limits, which leads to the
tracking of the whole IOVA space regardless of what devices use.

Signed-off-by: Avihai Horon <avih...@nvidia.com>
Signed-off-by: Joao Martins <joao.m.mart...@oracle.com>
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/common.c              | 58 +++++++++++++++++++++++++++++------
 hw/vfio/pci.c                 |  7 +++++
 3 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f41860988d6b..c4bafad084b4 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -71,6 +71,7 @@ typedef struct VFIOMigration {
 typedef struct VFIOAddressSpace {
     AddressSpace *as;
     bool no_dma_translation;
+    hwaddr max_iova;
     QLIST_HEAD(, VFIOContainer) containers;
     QLIST_ENTRY(VFIOAddressSpace) list;
 } VFIOAddressSpace;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ecfb9afb3fb6..85fddef24026 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -428,6 +428,25 @@ static bool vfio_viommu_preset(void)
     return false;
 }
 
+static int vfio_viommu_get_max_iova(hwaddr *max_iova)
+{
+    VFIOAddressSpace *space;
+
+    *max_iova = 0;
+
+    QLIST_FOREACH(space, &vfio_address_spaces, list) {
+        if (space->as == &address_space_memory) {
+            continue;
+        }
+
+        if (*max_iova < space->max_iova) {
+            *max_iova = space->max_iova;
+        }
+    }
+
+    return *max_iova == 0;
+}
+
 int vfio_block_giommu_migration(Error **errp)
 {
     int ret;
@@ -1464,10 +1483,11 @@ static const MemoryListener 
vfio_dirty_tracking_listener = {
     .region_add = vfio_listener_dirty_tracking_update,
 };
 
-static void vfio_dirty_tracking_init(VFIOContainer *container,
+static int vfio_dirty_tracking_init(VFIOContainer *container,
                                      VFIODirtyRanges *ranges)
 {
     VFIODirtyRangesListener dirty;
+    int ret;
 
     memset(&dirty, 0, sizeof(dirty));
     dirty.ranges.min32 = UINT32_MAX;
@@ -1475,17 +1495,29 @@ static void vfio_dirty_tracking_init(VFIOContainer 
*container,
     dirty.listener = vfio_dirty_tracking_listener;
     dirty.container = container;
 
-    memory_listener_register(&dirty.listener,
-                             container->space->as);
+    if (vfio_viommu_preset()) {
+        hwaddr iommu_max_iova;
+
+        ret = vfio_viommu_get_max_iova(&iommu_max_iova);
+        if (ret) {
+            return -EINVAL;
+        }
+
+        vfio_dirty_tracking_update(0, iommu_max_iova, &dirty.ranges);
+    } else {
+        memory_listener_register(&dirty.listener,
+                                 container->space->as);
+        /*
+         * The memory listener is synchronous, and used to calculate the range
+         * to dirty tracking. Unregister it after we are done as we are not
+         * interested in any follow-up updates.
+         */
+        memory_listener_unregister(&dirty.listener);
+    }
 
     *ranges = dirty.ranges;
 
-    /*
-     * The memory listener is synchronous, and used to calculate the range
-     * to dirty tracking. Unregister it after we are done as we are not
-     * interested in any follow-up updates.
-     */
-    memory_listener_unregister(&dirty.listener);
+    return 0;
 }
 
 static void vfio_devices_dma_logging_stop(VFIOContainer *container)
@@ -1590,7 +1622,13 @@ static int vfio_devices_dma_logging_start(VFIOContainer 
*container)
     VFIOGroup *group;
     int ret = 0;
 
-    vfio_dirty_tracking_init(container, &ranges);
+    ret = vfio_dirty_tracking_init(container, &ranges);
+    if (ret) {
+        error_report("Failed to init DMA logging ranges, err %d",
+                      ret);
+        return -EOPNOTSUPP;
+    }
+
     feature = vfio_device_feature_dma_logging_start_create(container,
                                                            &ranges);
     if (!feature) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8a98e6ffc480..3bda5618c5b5 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2974,6 +2974,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
                               &dma_translation);
     space->no_dma_translation = !dma_translation;
 
+    /*
+     * Support for advertised IOMMU address space boundaries is optional.
+     * By default, it is not advertised i.e. space::max_iova is 0.
+     */
+    pci_device_iommu_get_attr(pdev, IOMMU_ATTR_MAX_IOVA,
+                              &space->max_iova);
+
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
             error_setg(errp, "device is already attached");
-- 
2.17.2


Reply via email to