On 06/03/2023 19:22, Alex Williamson wrote:
> On Sat, 4 Mar 2023 01:43:40 +0000
> Joao Martins <joao.m.mart...@oracle.com> wrote:
>
>> Add device dirty page bitmap sync functionality. This uses the device
>> DMA logging uAPI to sync dirty page bitmap from the device.
>>
>> Device dirty page bitmap sync is used only if all devices within a
>> container support device dirty page tracking.
>>
>> Signed-off-by: Avihai Horon <avih...@nvidia.com>
>> Signed-off-by: Joao Martins <joao.m.mart...@oracle.com>
>> ---
>> hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++++-----
>> 1 file changed, 79 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index b0c7d03279ab..5b8456975e97 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -342,6 +342,9 @@ static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr
>> size)
>> return 0;
>> }
>>
>> +static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>> + uint64_t size, ram_addr_t ram_addr);
>> +
>> bool vfio_mig_active(void)
>> {
>> VFIOGroup *group;
>> @@ -565,10 +568,16 @@ static int vfio_dma_unmap(VFIOContainer *container,
>> .iova = iova,
>> .size = size,
>> };
>> + bool need_dirty_sync = false;
>> + int ret;
>> +
>> + if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
>> + if (!vfio_devices_all_device_dirty_tracking(container) &&
>> + container->dirty_pages_supported) {
>> + return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>> + }
>>
>> - if (iotlb && container->dirty_pages_supported &&
>> - vfio_devices_all_running_and_mig_active(container)) {
>> - return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>> + need_dirty_sync = true;
>> }
>>
>> while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
>> @@ -594,10 +603,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
>> return -errno;
>> }
>>
>> - if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
>> - cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
>> - tcg_enabled() ?
>> DIRTY_CLIENTS_ALL :
>> - DIRTY_CLIENTS_NOCODE);
>> + if (need_dirty_sync) {
>> + ret = vfio_get_dirty_bitmap(container, iova, size,
>> + iotlb->translated_addr);
>> + if (ret) {
>> + return ret;
>> + }
>> }
>>
>> return 0;
>> @@ -1579,6 +1590,58 @@ static void
>> vfio_listener_log_global_stop(MemoryListener *listener)
>> }
>> }
>>
>> +static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
>> + hwaddr size, void *bitmap)
>> +{
>> + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
>> + sizeof(struct
>> vfio_device_feature_dma_logging_report),
>> + sizeof(__aligned_u64))] = {};
>> + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
>> + struct vfio_device_feature_dma_logging_report *report =
>> + (struct vfio_device_feature_dma_logging_report *)feature->data;
>> +
>> + report->iova = iova;
>> + report->length = size;
>> + report->page_size = qemu_real_host_page_size();
>> + report->bitmap = (__aligned_u64)bitmap;
>> +
>> + feature->argsz = sizeof(buf);
>> + feature->flags =
>> + VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
>
> Nit, the series is inconsistent between initializing flags as above and
> as:
>
> feature->flags = VFIO_DEVICE_FEATURE_GET;
> feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
>
> My personal preference would be more like:
>
> feature->flags = VFIO_DEVICE_FEATURE_GET |
> VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
>
I'll switch the overall style to the latter.
> Thanks,
> Alex
>
>> +
>> + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
>> + return -errno;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
>> + VFIOBitmap *vbmap, hwaddr iova,
>> + hwaddr size)
>> +{
>> + VFIODevice *vbasedev;
>> + VFIOGroup *group;
>> + int ret;
>> +
>> + QLIST_FOREACH(group, &container->group_list, container_next) {
>> + QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> + ret = vfio_device_dma_logging_report(vbasedev, iova, size,
>> + vbmap->bitmap);
>> + if (ret) {
>> + error_report("%s: Failed to get DMA logging report, iova: "
>> + "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
>> + ", err: %d (%s)",
>> + vbasedev->name, iova, size, ret,
>> strerror(-ret));
>> +
>> + return ret;
>> + }
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap
>> *vbmap,
>> hwaddr iova, hwaddr size)
>> {
>> @@ -1619,10 +1682,12 @@ static int vfio_query_dirty_bitmap(VFIOContainer
>> *container, VFIOBitmap *vbmap,
>> static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>> uint64_t size, ram_addr_t ram_addr)
>> {
>> + bool all_device_dirty_tracking =
>> + vfio_devices_all_device_dirty_tracking(container);
>> VFIOBitmap vbmap;
>> int ret;
>>
>> - if (!container->dirty_pages_supported) {
>> + if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
>> cpu_physical_memory_set_dirty_range(ram_addr, size,
>> tcg_enabled() ?
>> DIRTY_CLIENTS_ALL :
>> DIRTY_CLIENTS_NOCODE);
>> @@ -1634,7 +1699,12 @@ static int vfio_get_dirty_bitmap(VFIOContainer
>> *container, uint64_t iova,
>> return -errno;
>> }
>>
>> - ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
>> + if (all_device_dirty_tracking) {
>> + ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova,
>> size);
>> + } else {
>> + ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
>> + }
>> +
>> if (ret) {
>> goto out;
>> }
>