On Thu, 2015-06-18 at 21:37 +1000, Alexey Kardashevskiy wrote: > This moves SPAPR bits to a separate file to avoid pollution of x86 code. > > This enables spapr-vfio on CONFIG_SOFTMMU (not CONFIG_PSERIES) as > the config options are only visible in makefiles and not in the source code > so there is no an obvious way of implementing stubs if hw/vfio/spapr.c is > not compiled. > > This is a mechanical patch.
Why does spapr code always need to be pulled out of common code and private interfaces exposed to be called in ad-hock ways? Doesn't that say something about a lack of design in the implementation? Why not also pull out type1 support and perhaps create an interface between vfio common code and vfio iommu code? > Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> > --- > hw/vfio/Makefile.objs | 1 + > hw/vfio/common.c | 137 ++----------------------- > hw/vfio/spapr.c | 229 > ++++++++++++++++++++++++++++++++++++++++++ > include/hw/vfio/vfio-common.h | 13 +++ > 4 files changed, 249 insertions(+), 131 deletions(-) > create mode 100644 hw/vfio/spapr.c > > diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs > index d540c9d..0dd99ed 100644 > --- a/hw/vfio/Makefile.objs > +++ b/hw/vfio/Makefile.objs > @@ -3,4 +3,5 @@ obj-$(CONFIG_SOFTMMU) += common.o > obj-$(CONFIG_PCI) += pci.o > obj-$(CONFIG_SOFTMMU) += platform.o > obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o > +obj-$(CONFIG_SOFTMMU) += spapr.o > endif > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index b1045da..3e4c685 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -190,8 +190,8 @@ const MemoryRegionOps vfio_region_ops = { > /* > * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 > */ > -static int vfio_dma_unmap(VFIOContainer *container, > - hwaddr iova, ram_addr_t size) > +int vfio_dma_unmap(VFIOContainer *container, > + hwaddr iova, ram_addr_t size) > { > struct vfio_iommu_type1_dma_unmap unmap = { > .argsz = sizeof(unmap), > @@ -208,8 +208,8 @@ static int vfio_dma_unmap(VFIOContainer *container, > return 0; > } > > -static int vfio_dma_map(VFIOContainer *container, hwaddr iova, > - ram_addr_t size, void *vaddr, bool readonly) > +int vfio_dma_map(VFIOContainer *container, hwaddr iova, > + ram_addr_t size, void *vaddr, bool readonly) > { > struct vfio_iommu_type1_dma_map map = { > .argsz = sizeof(map), > @@ -238,7 +238,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr > iova, > return -errno; > } > > -static bool vfio_listener_skipped_section(MemoryRegionSection *section) > +bool vfio_listener_skipped_section(MemoryRegionSection *section) > { > return (!memory_region_is_ram(section->mr) && > !memory_region_is_iommu(section->mr)) || > @@ -251,67 +251,6 @@ static bool > vfio_listener_skipped_section(MemoryRegionSection *section) > section->offset_within_address_space & (1ULL << 63); > } > > -static void vfio_iommu_map_notify(Notifier *n, void *data) > -{ > - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); > - VFIOContainer *container = giommu->container; > - IOMMUTLBEntry *iotlb = data; > - MemoryRegion *mr; > - hwaddr xlat; > - hwaddr len = iotlb->addr_mask + 1; > - void *vaddr; > - int ret; > - > - trace_vfio_iommu_map_notify(iotlb->iova, > - iotlb->iova + iotlb->addr_mask); > - > - /* > - * The IOMMU TLB entry we have just covers translation through > - * this IOMMU to its immediate target. We need to translate > - * it the rest of the way through to memory. > - */ > - rcu_read_lock(); > - mr = address_space_translate(&address_space_memory, > - iotlb->translated_addr, > - &xlat, &len, iotlb->perm & IOMMU_WO); > - if (!memory_region_is_ram(mr)) { > - error_report("iommu map to non memory area %"HWADDR_PRIx"", > - xlat); > - goto out; > - } > - /* > - * Translation truncates length to the IOMMU page size, > - * check that it did not truncate too much. > - */ > - if (len & iotlb->addr_mask) { > - error_report("iommu has granularity incompatible with target AS"); > - goto out; > - } > - > - if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { > - vaddr = memory_region_get_ram_ptr(mr) + xlat; > - ret = vfio_dma_map(container, iotlb->iova, > - iotlb->addr_mask + 1, vaddr, > - !(iotlb->perm & IOMMU_WO) || mr->readonly); > - if (ret) { > - error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " > - "0x%"HWADDR_PRIx", %p) = %d (%m)", > - container, iotlb->iova, > - iotlb->addr_mask + 1, vaddr, ret); > - } > - } else { > - ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); > - if (ret) { > - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > - "0x%"HWADDR_PRIx") = %d (%m)", > - container, iotlb->iova, > - iotlb->addr_mask + 1, ret); > - } > - } > -out: > - rcu_read_unlock(); > -} > - > static void vfio_listener_region_add(MemoryListener *listener, > MemoryRegionSection *section) > { > @@ -347,45 +286,6 @@ static void vfio_listener_region_add(MemoryListener > *listener, > > memory_region_ref(section->mr); > > - if (memory_region_is_iommu(section->mr)) { > - VFIOGuestIOMMU *giommu; > - > - trace_vfio_listener_region_add_iommu(iova, > - int128_get64(int128_sub(llend, int128_one()))); > - /* > - * FIXME: We should do some checking to see if the > - * capabilities of the host VFIO IOMMU are adequate to model > - * the guest IOMMU > - * > - * FIXME: For VFIO iommu types which have KVM acceleration to > - * avoid bouncing all map/unmaps through qemu this way, this > - * would be the right place to wire that up (tell the KVM > - * device emulation the VFIO iommu handles to use). > - */ > - /* > - * This assumes that the guest IOMMU is empty of > - * mappings at this point. > - * > - * One way of doing this is: > - * 1. Avoid sharing IOMMUs between emulated devices or different > - * IOMMU groups. > - * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if > - * there are some mappings in IOMMU. > - * > - * VFIO on SPAPR does that. Other IOMMU models may do that different, > - * they must make sure there are no existing mappings or > - * loop through existing mappings to map them into VFIO. > - */ > - giommu = g_malloc0(sizeof(*giommu)); > - giommu->iommu = section->mr; > - giommu->container = container; > - giommu->n.notify = vfio_iommu_map_notify; > - QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); > - memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); > - > - return; > - } > - > /* Here we assume that memory_region_is_ram(section->mr)==true */ > > end = int128_get64(llend); > @@ -438,27 +338,6 @@ static void vfio_listener_region_del(MemoryListener > *listener, > return; > } > > - if (memory_region_is_iommu(section->mr)) { > - VFIOGuestIOMMU *giommu; > - > - QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { > - if (giommu->iommu == section->mr) { > - memory_region_unregister_iommu_notifier(&giommu->n); > - QLIST_REMOVE(giommu, giommu_next); > - g_free(giommu); > - break; > - } > - } > - > - /* > - * FIXME: We assume the one big unmap below is adequate to > - * remove any individual page mappings in the IOMMU which > - * might have been copied into VFIO. This works for a page table > - * based IOMMU where a big unmap flattens a large range of IO-PTEs. > - * That may not be true for all IOMMU types. > - */ > - } > - > iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > end = (section->offset_within_address_space + > int128_get64(section->size)) & > TARGET_PAGE_MASK; > @@ -724,11 +603,7 @@ static int vfio_connect_container(VFIOGroup *group, > AddressSpace *as) > goto free_container_exit; > } > > - container->iommu_data.type1.listener = vfio_memory_listener; > - container->iommu_data.release = vfio_listener_release; > - > - memory_listener_register(&container->iommu_data.type1.listener, > - container->space->as); > + spapr_memory_listener_register(container); > > } else { > error_report("vfio: No available IOMMU models"); > diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c > new file mode 100644 > index 0000000..f03fab1 > --- /dev/null > +++ b/hw/vfio/spapr.c > @@ -0,0 +1,229 @@ > +/* > + * QEMU sPAPR VFIO IOMMU > + * > + * Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, > + * or (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "hw/vfio/vfio-common.h" > +#include "qemu/error-report.h" > +#include "trace.h" > + > +static void vfio_iommu_map_notify(Notifier *n, void *data) > +{ > + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); > + VFIOContainer *container = giommu->container; > + IOMMUTLBEntry *iotlb = data; > + MemoryRegion *mr; > + hwaddr xlat; > + hwaddr len = iotlb->addr_mask + 1; > + void *vaddr; > + int ret; > + > + trace_vfio_iommu_map_notify(iotlb->iova, > + iotlb->iova + iotlb->addr_mask); > + > + /* > + * The IOMMU TLB entry we have just covers translation through > + * this IOMMU to its immediate target. We need to translate > + * it the rest of the way through to memory. > + */ > + rcu_read_lock(); > + mr = address_space_translate(&address_space_memory, > + iotlb->translated_addr, > + &xlat, &len, iotlb->perm & IOMMU_WO); > + if (!memory_region_is_ram(mr)) { > + error_report("iommu map to non memory area %"HWADDR_PRIx"", > + xlat); > + goto out; > + } > + /* > + * Translation truncates length to the IOMMU page size, > + * check that it did not truncate too much. > + */ > + if (len & iotlb->addr_mask) { > + error_report("iommu has granularity incompatible with target AS"); > + goto out; > + } > + > + if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { > + vaddr = memory_region_get_ram_ptr(mr) + xlat; > + ret = vfio_dma_map(container, iotlb->iova, > + iotlb->addr_mask + 1, vaddr, > + !(iotlb->perm & IOMMU_WO) || mr->readonly); > + if (ret) { > + error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " > + "0x%"HWADDR_PRIx", %p) = %d (%m)", > + container, iotlb->iova, > + iotlb->addr_mask + 1, vaddr, ret); > + } > + } else { > + ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); > + if (ret) { > + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > + "0x%"HWADDR_PRIx") = %d (%m)", > + container, iotlb->iova, > + iotlb->addr_mask + 1, ret); > + } > + } > +out: > + rcu_read_unlock(); > +} > + > +static void vfio_spapr_listener_region_add(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.spapr.listener); > + hwaddr iova; > + Int128 llend; > + VFIOGuestIOMMU *giommu; > + > + if (vfio_listener_skipped_section(section)) { > + trace_vfio_listener_region_add_skip( > + section->offset_within_address_space, > + section->offset_within_address_space + > + int128_get64(int128_sub(section->size, int128_one()))); > + return; > + } > + > + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) > != > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + error_report("%s received unaligned region", __func__); > + return; > + } > + > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + llend = int128_make64(section->offset_within_address_space); > + llend = int128_add(llend, section->size); > + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > + > + if (int128_ge(int128_make64(iova), llend)) { > + return; > + } > + > + memory_region_ref(section->mr); > + > + trace_vfio_listener_region_add_iommu(iova, > + int128_get64(int128_sub(llend, int128_one()))); > + /* > + * FIXME: We should do some checking to see if the > + * capabilities of the host VFIO IOMMU are adequate to model > + * the guest IOMMU > + * > + * FIXME: For VFIO iommu types which have KVM acceleration to > + * avoid bouncing all map/unmaps through qemu this way, this > + * would be the right place to wire that up (tell the KVM > + * device emulation the VFIO iommu handles to use). > + */ > + /* > + * This assumes that the guest IOMMU is empty of > + * mappings at this point. > + * > + * One way of doing this is: > + * 1. Avoid sharing IOMMUs between emulated devices or different > + * IOMMU groups. > + * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if > + * there are some mappings in IOMMU. > + * > + * VFIO on SPAPR does that. Other IOMMU models may do that different, > + * they must make sure there are no existing mappings or > + * loop through existing mappings to map them into VFIO. > + */ > + giommu = g_malloc0(sizeof(*giommu)); > + giommu->iommu = section->mr; > + giommu->container = container; > + giommu->n.notify = vfio_iommu_map_notify; > + QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); > + memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); > +} > + > +static void vfio_spapr_listener_region_del(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.spapr.listener); > + hwaddr iova, end; > + int ret; > + VFIOGuestIOMMU *giommu; > + > + if (vfio_listener_skipped_section(section)) { > + trace_vfio_listener_region_del_skip( > + section->offset_within_address_space, > + section->offset_within_address_space + > + int128_get64(int128_sub(section->size, int128_one()))); > + return; > + } > + > + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) > != > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + error_report("%s received unaligned region", __func__); > + return; > + } > + > + QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { > + if (giommu->iommu == section->mr) { > + memory_region_unregister_iommu_notifier(&giommu->n); > + QLIST_REMOVE(giommu, giommu_next); > + g_free(giommu); > + break; > + } > + } > + > + /* > + * FIXME: We assume the one big unmap below is adequate to > + * remove any individual page mappings in the IOMMU which > + * might have been copied into VFIO. This works for a page table > + * based IOMMU where a big unmap flattens a large range of IO-PTEs. > + * That may not be true for all IOMMU types. > + */ > + > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + end = (section->offset_within_address_space + > int128_get64(section->size)) & > + TARGET_PAGE_MASK; > + > + if (iova >= end) { > + return; > + } > + > + trace_vfio_listener_region_del(iova, end - 1); > + > + ret = vfio_dma_unmap(container, iova, end - iova); > + memory_region_unref(section->mr); > + if (ret) { > + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > + "0x%"HWADDR_PRIx") = %d (%m)", > + container, iova, end - iova, ret); > + } > +} > + > +static const MemoryListener vfio_spapr_memory_listener = { > + .region_add = vfio_spapr_listener_region_add, > + .region_del = vfio_spapr_listener_region_del, > +}; > + > +static void vfio_spapr_listener_release(VFIOContainer *container) > +{ > + memory_listener_unregister(&container->iommu_data.spapr.listener); > +} > + > +void spapr_memory_listener_register(VFIOContainer *container) > +{ > + container->iommu_data.spapr.listener = vfio_spapr_memory_listener; > + container->iommu_data.release = vfio_spapr_listener_release; > + > + memory_listener_register(&container->iommu_data.spapr.listener, > + container->space->as); > +} > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 59a321d..d513a2f 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -70,6 +70,10 @@ typedef struct VFIOType1 { > bool initialized; > } VFIOType1; > > +typedef struct VFIOSPAPR { > + MemoryListener listener; > +} VFIOSPAPR; > + > typedef struct VFIOContainer { > VFIOAddressSpace *space; > int fd; /* /dev/vfio/vfio, empowered by the attached groups */ > @@ -77,6 +81,7 @@ typedef struct VFIOContainer { > /* enable abstraction to support various iommu backends */ > union { > VFIOType1 type1; > + VFIOSPAPR spapr; > }; > void (*release)(struct VFIOContainer *); > } iommu_data; > @@ -146,4 +151,12 @@ extern const MemoryRegionOps vfio_region_ops; > extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; > extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; > > +extern int vfio_dma_map(VFIOContainer *container, hwaddr iova, > + ram_addr_t size, void *vaddr, bool readonly); > +extern int vfio_dma_unmap(VFIOContainer *container, > + hwaddr iova, ram_addr_t size); > +bool vfio_listener_skipped_section(MemoryRegionSection *section); > + > +extern void spapr_memory_listener_register(VFIOContainer *container); > + > #endif /* !HW_VFIO_VFIO_COMMON_H */