On Mon, 6 Mar 2023 19:39:15 +0000 Joao Martins <joao.m.mart...@oracle.com> wrote:
> On 06/03/2023 18:42, Alex Williamson wrote: > > On Sat, 4 Mar 2023 01:43:38 +0000 > > Joao Martins <joao.m.mart...@oracle.com> wrote: > > > >> Add device dirty page tracking start/stop functionality. This uses the > >> device DMA logging uAPI to start and stop dirty page tracking by device. > >> > >> Device dirty page tracking is used only if all devices within a > >> container support device dirty page tracking. > >> > >> Signed-off-by: Avihai Horon <avih...@nvidia.com> > >> Signed-off-by: Joao Martins <joao.m.mart...@oracle.com> > >> --- > >> hw/vfio/common.c | 166 +++++++++++++++++++++++++++++++++- > >> hw/vfio/trace-events | 1 + > >> include/hw/vfio/vfio-common.h | 2 + > >> 3 files changed, 166 insertions(+), 3 deletions(-) > >> > >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c > >> index d84e5fd86bb4..aa0df0604704 100644 > >> --- a/hw/vfio/common.c > >> +++ b/hw/vfio/common.c > >> @@ -453,6 +453,22 @@ static bool > >> vfio_devices_all_dirty_tracking(VFIOContainer *container) > >> return true; > >> } > >> > >> +static bool vfio_devices_all_device_dirty_tracking(VFIOContainer > >> *container) > >> +{ > >> + VFIOGroup *group; > >> + VFIODevice *vbasedev; > >> + > >> + QLIST_FOREACH(group, &container->group_list, container_next) { > >> + QLIST_FOREACH(vbasedev, &group->device_list, next) { > >> + if (!vbasedev->dirty_pages_supported) { > >> + return false; > >> + } > >> + } > >> + } > >> + > >> + return true; > >> +} > >> + > >> /* > >> * Check if all VFIO devices are running and migration is active, which is > >> * essentially equivalent to the migration being in pre-copy phase. > >> @@ -1395,15 +1411,152 @@ static void > >> vfio_dirty_tracking_init(VFIOContainer *container) > >> qemu_mutex_destroy(&container->tracking_mutex); > >> } > >> > >> +static int vfio_devices_dma_logging_set(VFIOContainer *container, > >> + struct vfio_device_feature > >> *feature) > >> +{ > >> + bool status = (feature->flags & VFIO_DEVICE_FEATURE_MASK) == > >> + VFIO_DEVICE_FEATURE_DMA_LOGGING_START; > >> + VFIODevice *vbasedev; > >> + VFIOGroup *group; > >> + int ret = 0; > >> + > >> + QLIST_FOREACH(group, &container->group_list, container_next) { > >> + QLIST_FOREACH(vbasedev, &group->device_list, next) { > >> + if (vbasedev->dirty_tracking == status) { > >> + continue; > >> + } > >> + > >> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); > >> + if (ret) { > >> + ret = -errno; > >> + error_report("%s: Failed to set DMA logging %s, err %d > >> (%s)", > >> + vbasedev->name, status ? "start" : "stop", > >> ret, > >> + strerror(errno)); > >> + goto out; > >> + } > > > > Exiting and returning an error on the first failure when starting dirty > > tracking makes sense. Does that behavior still make sense for the stop > > path? Maybe since we only support a single device it doesn't really > > matter, but this needs to be revisited for multiple devices. Thanks, > > > > How about I test for @status and exit earlier based on that? > (maybe variable should be renamed too) e.g. > > if (ret) { > ret = -errno; > error_report("%s: Failed to set DMA logging %s, err %d (%s)", > vbasedev->name, status ? "start" : "stop", ret, > strerror(errno)) > if (status) { > goto out; > } > } Yep, exit on first error enabling, continue on disabling makes more sense, but then we need to look at what return code makes sense for the teardown. TBH, a teardown function would typically return void, so it's possible we'd be better off not using this for both. Thanks, Alex PS - no further original comments on v3 from me. > >> + vbasedev->dirty_tracking = status; > >> + } > >> + } > >> + > >> +out: > >> + return ret; > >> +} > >> + > >> +static int vfio_devices_dma_logging_stop(VFIOContainer *container) > >> +{ > >> + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), > >> + sizeof(uint64_t))] = {}; > >> + struct vfio_device_feature *feature = (struct vfio_device_feature > >> *)buf; > >> + > >> + feature->argsz = sizeof(buf); > >> + feature->flags = VFIO_DEVICE_FEATURE_SET; > >> + feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; > >> + > >> + return vfio_devices_dma_logging_set(container, feature); > >> +} > >> + > >> +static struct vfio_device_feature * > >> +vfio_device_feature_dma_logging_start_create(VFIOContainer *container) > >> +{ > >> + struct vfio_device_feature *feature; > >> + size_t feature_size; > >> + struct vfio_device_feature_dma_logging_control *control; > >> + struct vfio_device_feature_dma_logging_range *ranges; > >> + VFIODirtyTrackingRange *tracking = &container->tracking_range; > >> + > >> + feature_size = sizeof(struct vfio_device_feature) + > >> + sizeof(struct vfio_device_feature_dma_logging_control); > >> + feature = g_try_malloc0(feature_size); > >> + if (!feature) { > >> + errno = ENOMEM; > >> + return NULL; > >> + } > >> + feature->argsz = feature_size; > >> + feature->flags = VFIO_DEVICE_FEATURE_SET; > >> + feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_START; > >> + > >> + control = (struct vfio_device_feature_dma_logging_control > >> *)feature->data; > >> + control->page_size = qemu_real_host_page_size(); > >> + > >> + /* > >> + * DMA logging uAPI guarantees to support at least a number of ranges > >> that > >> + * fits into a single host kernel base page. > >> + */ > >> + control->num_ranges = !!tracking->max32 + !!tracking->max64; > >> + ranges = g_try_new0(struct vfio_device_feature_dma_logging_range, > >> + control->num_ranges); > >> + if (!ranges) { > >> + g_free(feature); > >> + errno = ENOMEM; > >> + > >> + return NULL; > >> + } > >> + > >> + control->ranges = (__aligned_u64)ranges; > >> + if (tracking->max32) { > >> + ranges->iova = tracking->min32; > >> + ranges->length = (tracking->max32 - tracking->min32) + 1; > >> + ranges++; > >> + } > >> + if (tracking->max64) { > >> + ranges->iova = tracking->min64; > >> + ranges->length = (tracking->max64 - tracking->min64) + 1; > >> + } > >> + > >> + trace_vfio_device_dirty_tracking_start(control->num_ranges, > >> + tracking->min32, > >> tracking->max32, > >> + tracking->min64, > >> tracking->max64); > >> + > >> + return feature; > >> +} > >> + > >> +static void vfio_device_feature_dma_logging_start_destroy( > >> + struct vfio_device_feature *feature) > >> +{ > >> + struct vfio_device_feature_dma_logging_control *control = > >> + (struct vfio_device_feature_dma_logging_control *)feature->data; > >> + struct vfio_device_feature_dma_logging_range *ranges = > >> + (struct vfio_device_feature_dma_logging_range *)control->ranges; > >> + > >> + g_free(ranges); > >> + g_free(feature); > >> +} > >> + > >> +static int vfio_devices_dma_logging_start(VFIOContainer *container) > >> +{ > >> + struct vfio_device_feature *feature; > >> + int ret = 0; > >> + > >> + vfio_dirty_tracking_init(container); > >> + feature = vfio_device_feature_dma_logging_start_create(container); > >> + if (!feature) { > >> + return -errno; > >> + } > >> + > >> + ret = vfio_devices_dma_logging_set(container, feature); > >> + if (ret) { > >> + vfio_devices_dma_logging_stop(container); > >> + } > >> + > >> + vfio_device_feature_dma_logging_start_destroy(feature); > >> + > >> + return ret; > >> +} > >> + > >> static void vfio_listener_log_global_start(MemoryListener *listener) > >> { > >> VFIOContainer *container = container_of(listener, VFIOContainer, > >> listener); > >> int ret; > >> > >> - vfio_dirty_tracking_init(container); > >> + if (vfio_devices_all_device_dirty_tracking(container)) { > >> + ret = vfio_devices_dma_logging_start(container); > >> + } else { > >> + ret = vfio_set_dirty_page_tracking(container, true); > >> + } > >> > >> - ret = vfio_set_dirty_page_tracking(container, true); > >> if (ret) { > >> + error_report("vfio: Could not start dirty page tracking, err: %d > >> (%s)", > >> + ret, strerror(-ret)); > >> vfio_set_migration_error(ret); > >> } > >> } > >> @@ -1413,8 +1566,15 @@ static void > >> vfio_listener_log_global_stop(MemoryListener *listener) > >> VFIOContainer *container = container_of(listener, VFIOContainer, > >> listener); > >> int ret; > >> > >> - ret = vfio_set_dirty_page_tracking(container, false); > >> + if (vfio_devices_all_device_dirty_tracking(container)) { > >> + ret = vfio_devices_dma_logging_stop(container); > >> + } else { > >> + ret = vfio_set_dirty_page_tracking(container, false); > >> + } > >> + > >> if (ret) { > >> + error_report("vfio: Could not stop dirty page tracking, err: %d > >> (%s)", > >> + ret, strerror(-ret)); > >> vfio_set_migration_error(ret); > >> } > >> } > >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > >> index d97a6de17921..7a7e0cfe5b23 100644 > >> --- a/hw/vfio/trace-events > >> +++ b/hw/vfio/trace-events > >> @@ -105,6 +105,7 @@ vfio_listener_region_add_no_dma_map(const char *name, > >> uint64_t iova, uint64_t si > >> vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING > >> region_del 0x%"PRIx64" - 0x%"PRIx64 > >> vfio_listener_region_del(uint64_t start, uint64_t end) "region_del > >> 0x%"PRIx64" - 0x%"PRIx64 > >> vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t > >> min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update > >> [0x%"PRIx64" - 0x%"PRIx64"]" > >> +vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t > >> max32, uint64_t min64, uint64_t max64) "nr_ranges %d 32:[0x%"PRIx64" - > >> 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"]" > >> vfio_disconnect_container(int fd) "close container->fd=%d" > >> vfio_put_group(int fd) "close group->fd=%d" > >> vfio_get_device(const char * name, unsigned int flags, unsigned int > >> num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, > >> irqs: %u" > >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >> index 96791add2719..1cbbccd91e11 100644 > >> --- a/include/hw/vfio/vfio-common.h > >> +++ b/include/hw/vfio/vfio-common.h > >> @@ -154,6 +154,8 @@ typedef struct VFIODevice { > >> VFIOMigration *migration; > >> Error *migration_blocker; > >> OnOffAuto pre_copy_dirty_page_tracking; > >> + bool dirty_pages_supported; > >> + bool dirty_tracking; > >> } VFIODevice; > >> > >> struct VFIODeviceOps { > > >