According to the device DMA logging uAPI, IOVA ranges to be logged by the device must be provided all at once upon DMA logging start.
As preparation for the following patches which will add device dirty page tracking, keep a record of all DMA mapped IOVA ranges so later they can be used for DMA logging start. Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked. This is due to the dynamic nature of vIOMMU DMA mapping/unmapping. Signed-off-by: Avihai Horon <avih...@nvidia.com> Signed-off-by: Joao Martins <joao.m.mart...@oracle.com> --- hw/vfio/common.c | 84 +++++++++++++++++++++++++++++++++++ hw/vfio/trace-events | 1 + include/hw/vfio/vfio-common.h | 11 +++++ 3 files changed, 96 insertions(+) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index ed908e303dbb..d84e5fd86bb4 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -44,6 +44,7 @@ #include "migration/blocker.h" #include "migration/qemu-file.h" #include "sysemu/tpm.h" +#include "qemu/iova-tree.h" VFIOGroupList vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); @@ -1313,11 +1314,94 @@ static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) return ret; } +/* + * Called for the dirty tracking memory listener to calculate the iova/end + * for a given memory region section. The checks here, replicate the logic + * in vfio_listener_region_{add,del}() used for the same purpose. And thus + * both listener should be kept in sync. + */ +static bool vfio_get_section_iova_range(VFIOContainer *container, + MemoryRegionSection *section, + hwaddr *out_iova, hwaddr *out_end) +{ + Int128 llend; + hwaddr iova; + + iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask())); + + if (int128_ge(int128_make64(iova), llend)) { + return false; + } + + *out_iova = iova; + *out_end = int128_get64(llend) - 1; + return true; +} + +static void vfio_dirty_tracking_update(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + tracking_listener); + VFIODirtyTrackingRange *range = &container->tracking_range; + hwaddr max32 = (1ULL << 32) - 1ULL; + hwaddr iova, end; + + if (!vfio_listener_valid_section(section) || + !vfio_get_section_iova_range(container, section, &iova, &end)) { + return; + } + + WITH_QEMU_LOCK_GUARD(&container->tracking_mutex) { + if (iova < max32 && end <= max32) { + if (range->min32 > iova) { + range->min32 = iova; + } + if (range->max32 < end) { + range->max32 = end; + } + trace_vfio_device_dirty_tracking_update(iova, end, + range->min32, range->max32); + } else { + if (!range->min64 || range->min64 > iova) { + range->min64 = iova; + } + if (range->max64 < end) { + range->max64 = end; + } + trace_vfio_device_dirty_tracking_update(iova, end, + range->min64, range->max64); + } + } + return; +} + +static const MemoryListener vfio_dirty_tracking_listener = { + .name = "vfio-tracking", + .region_add = vfio_dirty_tracking_update, +}; + +static void vfio_dirty_tracking_init(VFIOContainer *container) +{ + memset(&container->tracking_range, 0, sizeof(container->tracking_range)); + qemu_mutex_init(&container->tracking_mutex); + container->tracking_listener = vfio_dirty_tracking_listener; + memory_listener_register(&container->tracking_listener, + container->space->as); + memory_listener_unregister(&container->tracking_listener); + qemu_mutex_destroy(&container->tracking_mutex); +} + static void vfio_listener_log_global_start(MemoryListener *listener) { VFIOContainer *container = container_of(listener, VFIOContainer, listener); int ret; + vfio_dirty_tracking_init(container); + ret = vfio_set_dirty_page_tracking(container, true); if (ret) { vfio_set_migration_error(ret); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 669d9fe07cd9..d97a6de17921 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -104,6 +104,7 @@ vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_wi vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]" vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 87524c64a443..96791add2719 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -23,6 +23,7 @@ #include "exec/memory.h" #include "qemu/queue.h" +#include "qemu/iova-tree.h" #include "qemu/notify.h" #include "ui/console.h" #include "hw/display/ramfb.h" @@ -68,6 +69,13 @@ typedef struct VFIOMigration { size_t data_buffer_size; } VFIOMigration; +typedef struct VFIODirtyTrackingRange { + hwaddr min32; + hwaddr max32; + hwaddr min64; + hwaddr max64; +} VFIODirtyTrackingRange; + typedef struct VFIOAddressSpace { AddressSpace *as; QLIST_HEAD(, VFIOContainer) containers; @@ -89,6 +97,9 @@ typedef struct VFIOContainer { uint64_t max_dirty_bitmap_size; unsigned long pgsizes; unsigned int dma_max_mappings; + VFIODirtyTrackingRange tracking_range; + QemuMutex tracking_mutex; + MemoryListener tracking_listener; QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; QLIST_HEAD(, VFIOGroup) group_list; -- 2.17.2