On Sat, 4 Mar 2023 01:43:38 +0000 Joao Martins <joao.m.mart...@oracle.com> wrote:
> Add device dirty page tracking start/stop functionality. This uses the > device DMA logging uAPI to start and stop dirty page tracking by device. > > Device dirty page tracking is used only if all devices within a > container support device dirty page tracking. > > Signed-off-by: Avihai Horon <avih...@nvidia.com> > Signed-off-by: Joao Martins <joao.m.mart...@oracle.com> > --- > hw/vfio/common.c | 166 +++++++++++++++++++++++++++++++++- > hw/vfio/trace-events | 1 + > include/hw/vfio/vfio-common.h | 2 + > 3 files changed, 166 insertions(+), 3 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index d84e5fd86bb4..aa0df0604704 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -453,6 +453,22 @@ static bool > vfio_devices_all_dirty_tracking(VFIOContainer *container) > return true; > } > > +static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + > + QLIST_FOREACH(group, &container->group_list, container_next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if (!vbasedev->dirty_pages_supported) { > + return false; > + } > + } > + } > + > + return true; > +} > + > /* > * Check if all VFIO devices are running and migration is active, which is > * essentially equivalent to the migration being in pre-copy phase. > @@ -1395,15 +1411,152 @@ static void vfio_dirty_tracking_init(VFIOContainer > *container) > qemu_mutex_destroy(&container->tracking_mutex); > } > > +static int vfio_devices_dma_logging_set(VFIOContainer *container, > + struct vfio_device_feature *feature) > +{ > + bool status = (feature->flags & VFIO_DEVICE_FEATURE_MASK) == > + VFIO_DEVICE_FEATURE_DMA_LOGGING_START; > + VFIODevice *vbasedev; > + VFIOGroup *group; > + int ret = 0; > + > + QLIST_FOREACH(group, &container->group_list, container_next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if (vbasedev->dirty_tracking == status) { > + continue; > + } > + > + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); > + if (ret) { > + ret = -errno; > + error_report("%s: Failed to set DMA logging %s, err %d (%s)", > + vbasedev->name, status ? "start" : "stop", ret, > + strerror(errno)); > + goto out; > + } Exiting and returning an error on the first failure when starting dirty tracking makes sense. Does that behavior still make sense for the stop path? Maybe since we only support a single device it doesn't really matter, but this needs to be revisited for multiple devices. Thanks, Alex > + vbasedev->dirty_tracking = status; > + } > + } > + > +out: > + return ret; > +} > + > +static int vfio_devices_dma_logging_stop(VFIOContainer *container) > +{ > + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), > + sizeof(uint64_t))] = {}; > + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; > + > + feature->argsz = sizeof(buf); > + feature->flags = VFIO_DEVICE_FEATURE_SET; > + feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; > + > + return vfio_devices_dma_logging_set(container, feature); > +} > + > +static struct vfio_device_feature * > +vfio_device_feature_dma_logging_start_create(VFIOContainer *container) > +{ > + struct vfio_device_feature *feature; > + size_t feature_size; > + struct vfio_device_feature_dma_logging_control *control; > + struct vfio_device_feature_dma_logging_range *ranges; > + VFIODirtyTrackingRange *tracking = &container->tracking_range; > + > + feature_size = sizeof(struct vfio_device_feature) + > + sizeof(struct vfio_device_feature_dma_logging_control); > + feature = g_try_malloc0(feature_size); > + if (!feature) { > + errno = ENOMEM; > + return NULL; > + } > + feature->argsz = feature_size; > + feature->flags = VFIO_DEVICE_FEATURE_SET; > + feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_START; > + > + control = (struct vfio_device_feature_dma_logging_control > *)feature->data; > + control->page_size = qemu_real_host_page_size(); > + > + /* > + * DMA logging uAPI guarantees to support at least a number of ranges > that > + * fits into a single host kernel base page. > + */ > + control->num_ranges = !!tracking->max32 + !!tracking->max64; > + ranges = g_try_new0(struct vfio_device_feature_dma_logging_range, > + control->num_ranges); > + if (!ranges) { > + g_free(feature); > + errno = ENOMEM; > + > + return NULL; > + } > + > + control->ranges = (__aligned_u64)ranges; > + if (tracking->max32) { > + ranges->iova = tracking->min32; > + ranges->length = (tracking->max32 - tracking->min32) + 1; > + ranges++; > + } > + if (tracking->max64) { > + ranges->iova = tracking->min64; > + ranges->length = (tracking->max64 - tracking->min64) + 1; > + } > + > + trace_vfio_device_dirty_tracking_start(control->num_ranges, > + tracking->min32, tracking->max32, > + tracking->min64, tracking->max64); > + > + return feature; > +} > + > +static void vfio_device_feature_dma_logging_start_destroy( > + struct vfio_device_feature *feature) > +{ > + struct vfio_device_feature_dma_logging_control *control = > + (struct vfio_device_feature_dma_logging_control *)feature->data; > + struct vfio_device_feature_dma_logging_range *ranges = > + (struct vfio_device_feature_dma_logging_range *)control->ranges; > + > + g_free(ranges); > + g_free(feature); > +} > + > +static int vfio_devices_dma_logging_start(VFIOContainer *container) > +{ > + struct vfio_device_feature *feature; > + int ret = 0; > + > + vfio_dirty_tracking_init(container); > + feature = vfio_device_feature_dma_logging_start_create(container); > + if (!feature) { > + return -errno; > + } > + > + ret = vfio_devices_dma_logging_set(container, feature); > + if (ret) { > + vfio_devices_dma_logging_stop(container); > + } > + > + vfio_device_feature_dma_logging_start_destroy(feature); > + > + return ret; > +} > + > static void vfio_listener_log_global_start(MemoryListener *listener) > { > VFIOContainer *container = container_of(listener, VFIOContainer, > listener); > int ret; > > - vfio_dirty_tracking_init(container); > + if (vfio_devices_all_device_dirty_tracking(container)) { > + ret = vfio_devices_dma_logging_start(container); > + } else { > + ret = vfio_set_dirty_page_tracking(container, true); > + } > > - ret = vfio_set_dirty_page_tracking(container, true); > if (ret) { > + error_report("vfio: Could not start dirty page tracking, err: %d > (%s)", > + ret, strerror(-ret)); > vfio_set_migration_error(ret); > } > } > @@ -1413,8 +1566,15 @@ static void > vfio_listener_log_global_stop(MemoryListener *listener) > VFIOContainer *container = container_of(listener, VFIOContainer, > listener); > int ret; > > - ret = vfio_set_dirty_page_tracking(container, false); > + if (vfio_devices_all_device_dirty_tracking(container)) { > + ret = vfio_devices_dma_logging_stop(container); > + } else { > + ret = vfio_set_dirty_page_tracking(container, false); > + } > + > if (ret) { > + error_report("vfio: Could not stop dirty page tracking, err: %d > (%s)", > + ret, strerror(-ret)); > vfio_set_migration_error(ret); > } > } > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index d97a6de17921..7a7e0cfe5b23 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -105,6 +105,7 @@ vfio_listener_region_add_no_dma_map(const char *name, > uint64_t iova, uint64_t si > vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING > region_del 0x%"PRIx64" - 0x%"PRIx64 > vfio_listener_region_del(uint64_t start, uint64_t end) "region_del > 0x%"PRIx64" - 0x%"PRIx64 > vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t > min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" > - 0x%"PRIx64"]" > +vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t > max32, uint64_t min64, uint64_t max64) "nr_ranges %d 32:[0x%"PRIx64" - > 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"]" > vfio_disconnect_container(int fd) "close container->fd=%d" > vfio_put_group(int fd) "close group->fd=%d" > vfio_get_device(const char * name, unsigned int flags, unsigned int > num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: > %u" > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 96791add2719..1cbbccd91e11 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -154,6 +154,8 @@ typedef struct VFIODevice { > VFIOMigration *migration; > Error *migration_blocker; > OnOffAuto pre_copy_dirty_page_tracking; > + bool dirty_pages_supported; > + bool dirty_tracking; > } VFIODevice; > > struct VFIODeviceOps {