On Thu, 2015-07-16 at 11:26 +1000, Alexey Kardashevskiy wrote: > On 07/16/2015 04:26 AM, Alex Williamson wrote: > > On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote: > >> The existing memory listener is called on RAM or PCI address space > >> which implies potentially different page size. Instead of guessing > >> what page size should be used, this replaces a single IOMMU memory > >> listener by two, one per supported IOMMU type; listener callbacks > >> call the existing helpers with a known page size. > >> > >> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size > >> from IOMMU. > >> > >> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions, > >> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip > >> non IOMMU regions (which is an MSIX window) which duplicates > >> vfio_listener_skipped_section() a bit. > >> > >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > >> --- > >> hw/vfio/common.c | 95 > >> ++++++++++++++++++++++++++++++++++++++++++++------------ > >> 1 file changed, 76 insertions(+), 19 deletions(-) > >> > >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c > >> index 85ee9b0..aad41e1 100644 > >> --- a/hw/vfio/common.c > >> +++ b/hw/vfio/common.c > >> @@ -312,11 +312,11 @@ out: > >> rcu_read_unlock(); > >> } > >> > >> -static void vfio_listener_region_add(MemoryListener *listener, > >> +static void vfio_listener_region_add(VFIOContainer *container, > >> + hwaddr page_mask, > > > > Should memory_region_iommu_get_page_sizes() return a hwaddr? > > > I do not think so, memory.c uses uint64_t for masks. > > > > >> + MemoryListener *listener, > >> MemoryRegionSection *section) > >> { > >> - VFIOContainer *container = container_of(listener, VFIOContainer, > >> - iommu_data.type1.listener); > >> hwaddr iova, end; > >> Int128 llend; > >> void *vaddr; > >> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener > >> *listener, > >> return; > >> } > >> > >> - if (unlikely((section->offset_within_address_space & > >> ~TARGET_PAGE_MASK) != > >> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > >> + if (unlikely((section->offset_within_address_space & ~page_mask) != > >> + (section->offset_within_region & ~page_mask))) { > >> error_report("%s received unaligned region", __func__); > >> return; > >> } > >> > >> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > >> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); > >> llend = int128_make64(section->offset_within_address_space); > >> llend = int128_add(llend, section->size); > >> - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > >> + llend = int128_and(llend, int128_exts64(page_mask)); > >> > >> if (int128_ge(int128_make64(iova), llend)) { > >> return; > >> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener > >> *listener, > >> } > >> } > >> > >> -static void vfio_listener_region_del(MemoryListener *listener, > >> +static void vfio_listener_region_del(VFIOContainer *container, > >> + hwaddr page_mask, > >> + MemoryListener *listener, > >> MemoryRegionSection *section) > >> { > >> - VFIOContainer *container = container_of(listener, VFIOContainer, > >> - iommu_data.type1.listener); > >> hwaddr iova, end; > >> int ret; > >> > >> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener > >> *listener, > >> return; > >> } > >> > >> - if (unlikely((section->offset_within_address_space & > >> ~TARGET_PAGE_MASK) != > >> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > >> + if (unlikely((section->offset_within_address_space & ~page_mask) != > >> + (section->offset_within_region & ~page_mask))) { > >> error_report("%s received unaligned region", __func__); > >> return; > >> } > >> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener > >> *listener, > >> */ > >> } > >> > >> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > >> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); > >> end = (section->offset_within_address_space + > >> int128_get64(section->size)) & > >> - TARGET_PAGE_MASK; > >> + page_mask; > >> > >> if (iova >= end) { > >> return; > >> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener > >> *listener, > >> } > >> } > >> > >> -static const MemoryListener vfio_memory_listener = { > >> - .region_add = vfio_listener_region_add, > >> - .region_del = vfio_listener_region_del, > >> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener, > >> + MemoryRegionSection > >> *section) > >> +{ > >> + VFIOContainer *container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + > >> + vfio_listener_region_add(container, qemu_real_host_page_mask, > >> listener, > >> + section); > >> +} > >> + > >> + > >> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener, > >> + MemoryRegionSection > >> *section) > >> +{ > >> + VFIOContainer *container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + > >> + vfio_listener_region_del(container, qemu_real_host_page_mask, > >> listener, > >> + section); > >> +} > >> + > >> +static const MemoryListener vfio_type1_iommu_listener = { > >> + .region_add = vfio_type1_iommu_listener_region_add, > >> + .region_del = vfio_type1_iommu_listener_region_del, > >> +}; > >> + > >> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener, > >> + MemoryRegionSection *section) > >> +{ > >> + VFIOContainer *container; > >> + hwaddr page_mask; > >> + > >> + if (!memory_region_is_iommu(section->mr)) { > >> + return; > >> + } > >> + container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); > >> + vfio_listener_region_add(container, page_mask, listener, section); > >> +} > >> + > >> + > >> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener, > >> + MemoryRegionSection *section) > >> +{ > >> + VFIOContainer *container; > >> + hwaddr page_mask; > >> + > >> + if (!memory_region_is_iommu(section->mr)) { > >> + return; > >> + } > >> + container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); > >> + vfio_listener_region_del(container, page_mask, listener, section); > >> +} > >> + > >> +static const MemoryListener vfio_spapr_iommu_listener = { > >> + .region_add = vfio_spapr_iommu_listener_region_add, > >> + .region_del = vfio_spapr_iommu_listener_region_del, > >> }; > > > > > > Rather than creating all these wrappers, why don't we create a structure > > that gives us callbacks we can use for a shared function? If we had: > > > > struct VFIOMemoryListener { > > struct MemoryListener listener; > > bool (*filter)(MemoryRegionSection *section); > > hwaddr (page_size)(MemoryRegionSection *section); > > VFIOContainer *container; > > } > > > > Then there would be no reason for you to have separate wrappers for > > spapr. VFIOType1 would have a single VFIOMemoryListener, you could have > > an array of two. > > Sorry, I am missing the point here... > > I cannot just have an array of these - I will have to store an address > spaces per a listener in a container to implement filter() so I'd rather > store just an address space and not add filter() at all. And I will still
You're not storing an address space now. > need "memory: Add reporting of supported page sizes" - or there is no way Just like you do now. > to implement it all. So I still end up having 6 callbacks in > hw/vfio/common.c. What does this win for us? Nothing you're suggesting adds any more callbacks. The current filter is vfio_listener_skipped_section(). The iommu filter is simply: bool vfio_iommu_listener_skipped_section(mr) { return vfio_listener_skipped_section(mr) || !memory_region_is_iommu(mr); } Notice how I didn't have to call it spapr because it's probably what any guest iommu is going to do... The v2 spapr filter is: bool vfio_nodump_listener_skipped_section(mr) { return vfio_listener_skipped_section(mr) || memory_region_is_skip_dump(mr); } Notice again that we can use generic functions and not make everything spapr specific. The page_size callbacks are similarly trivial, the iommu one obviously calls memory_region_iommu_get_page_sizes, the other one uses ~qemu_real_host_page_mask. Again, nothing spapr specific. The container of course points back to the container and now vfio_listener_region_add() simply does: VFIOMemoryListener vlistener = container_of(listener, VFIOMemoryListener, listener); VFIOContainer = vlistener->container; hwaddr page_mask = vlistener->page_size(mr); if (vlistener->filter && vlistener->filter(mr)) { return } ... And suddenly we get to piece together a handful of _generic_ helper functions into a VFIOMemoryListener and we're not adding a new pair of wrappers for everything and duplicating trivial setup. Thanks, Alex > >> > >> static void vfio_listener_release(VFIOContainer *container) > >> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, > >> AddressSpace *as) > >> goto free_container_exit; > >> } > >> > >> - container->iommu_data.type1.listener = vfio_memory_listener; > >> + container->iommu_data.type1.listener = vfio_type1_iommu_listener; > >> container->iommu_data.release = vfio_listener_release; > >> > >> memory_listener_register(&container->iommu_data.type1.listener, > >> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, > >> AddressSpace *as) > >> goto free_container_exit; > >> } > >> > >> - container->iommu_data.type1.listener = vfio_memory_listener; > >> + container->iommu_data.type1.listener = vfio_spapr_iommu_listener; > >> container->iommu_data.release = vfio_listener_release; > >> > >> memory_listener_register(&container->iommu_data.type1.listener, > > > > > > > >