The existing memory listener is called on RAM or PCI address space which implies potentially different page size. Instead of guessing what page size should be used, this replaces a single IOMMU memory listener by two, one per supported IOMMU type; listener callbacks call the existing helpers with a known page size.
For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size from IOMMU. As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions, this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip non IOMMU regions (which is an MSIX window) which duplicates vfio_listener_skipped_section() a bit. Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> --- hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 19 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 85ee9b0..aad41e1 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -312,11 +312,11 @@ out: rcu_read_unlock(); } -static void vfio_listener_region_add(MemoryListener *listener, +static void vfio_listener_region_add(VFIOContainer *container, + hwaddr page_mask, + MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.type1.listener); hwaddr iova, end; Int128 llend; void *vaddr; @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { error_report("%s received unaligned region", __func__); return; } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); llend = int128_make64(section->offset_within_address_space); llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + llend = int128_and(llend, int128_exts64(page_mask)); if (int128_ge(int128_make64(iova), llend)) { return; @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener, } } -static void vfio_listener_region_del(MemoryListener *listener, +static void vfio_listener_region_del(VFIOContainer *container, + hwaddr page_mask, + MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.type1.listener); hwaddr iova, end; int ret; @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { error_report("%s received unaligned region", __func__); return; } @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener, */ } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); end = (section->offset_within_address_space + int128_get64(section->size)) & - TARGET_PAGE_MASK; + page_mask; if (iova >= end) { return; @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener, } } -static const MemoryListener vfio_memory_listener = { - .region_add = vfio_listener_region_add, - .region_del = vfio_listener_region_del, +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + + vfio_listener_region_add(container, qemu_real_host_page_mask, listener, + section); +} + + +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + + vfio_listener_region_del(container, qemu_real_host_page_mask, listener, + section); +} + +static const MemoryListener vfio_type1_iommu_listener = { + .region_add = vfio_type1_iommu_listener_region_add, + .region_del = vfio_type1_iommu_listener_region_del, +}; + +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container; + hwaddr page_mask; + + if (!memory_region_is_iommu(section->mr)) { + return; + } + container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); + vfio_listener_region_add(container, page_mask, listener, section); +} + + +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container; + hwaddr page_mask; + + if (!memory_region_is_iommu(section->mr)) { + return; + } + container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); + vfio_listener_region_del(container, page_mask, listener, section); +} + +static const MemoryListener vfio_spapr_iommu_listener = { + .region_add = vfio_spapr_iommu_listener_region_add, + .region_del = vfio_spapr_iommu_listener_region_del, }; static void vfio_listener_release(VFIOContainer *container) @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) goto free_container_exit; } - container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.type1.listener = vfio_type1_iommu_listener; container->iommu_data.release = vfio_listener_release; memory_listener_register(&container->iommu_data.type1.listener, @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) goto free_container_exit; } - container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.type1.listener = vfio_spapr_iommu_listener; container->iommu_data.release = vfio_listener_release; memory_listener_register(&container->iommu_data.type1.listener, -- 2.4.0.rc3.8.gfb3e7d5