On Tue, Oct 30, 2012 at 11:47 AM, Avi Kivity <a...@redhat.com> wrote: > Add a new memory region type that translates addresses it is given, > then forwards them to a target address space. This is similar to > an alias, except that the mapping is more flexible than a linear > translation and trucation, and also less efficient since the > translation happens at runtime. > > The implementation uses an AddressSpace mapping the target region to > avoid hierarchical dispatch all the way to the resolved region; only > iommu regions are looked up dynamically. > > Signed-off-by: Avi Kivity <a...@redhat.com> > --- > exec.c | 28 ++++++++++++++--- > memory.c | 106 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > memory.h | 45 +++++++++++++++++++++++++++ > 3 files changed, 175 insertions(+), 4 deletions(-) > > diff --git a/exec.c b/exec.c > index deee8ec..c3914fc 100644 > --- a/exec.c > +++ b/exec.c > @@ -3507,6 +3507,7 @@ void cpu_physical_memory_write_rom(hwaddr addr, > > typedef struct { > void *buffer; > + AddressSpace *as; > hwaddr addr; > hwaddr len; > } BounceBuffer; > @@ -3572,23 +3573,42 @@ void *address_space_map(AddressSpace *as, > ram_addr_t raddr = RAM_ADDR_MAX; > ram_addr_t rlen; > void *ret; > + IOMMUTLBEntry iotlb; > + hwaddr xlat; > + AddressSpace *as_xlat; > > while (len > 0) { > + xlat = addr; > + as_xlat = as; > page = addr & TARGET_PAGE_MASK; > l = (page + TARGET_PAGE_SIZE) - addr; > if (l > len) > l = len; > section = phys_page_find(d, page >> TARGET_PAGE_BITS); > > + while (section->mr->iommu_ops) { > + iotlb = section->mr->iommu_ops->translate(section->mr, xlat); > + if (iotlb.perm[is_write]) { > + xlat = ((iotlb.translated_addr & ~iotlb.addr_mask) > + | (addr & iotlb.addr_mask)); > + as_xlat = section->mr->iommu_target_as; > + l = (MIN(xlat + l - 1, xlat | iotlb.addr_mask) - xlat) + 1; > + section = phys_page_find(as_xlat->dispatch, xlat >> > TARGET_PAGE_BITS); > + } else { > + section = &phys_sections[phys_section_unassigned]; > + } > + } > + > if (!(memory_region_is_ram(section->mr) && !section->readonly)) { > if (todo || bounce.buffer) { > break; > } > bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, > TARGET_PAGE_SIZE); > - bounce.addr = addr; > + bounce.addr = xlat; > + bounce.as = as_xlat; > bounce.len = l; > if (!is_write) { > - address_space_read(as, addr, bounce.buffer, l); > + address_space_read(as_xlat, xlat, bounce.buffer, l); > } > > *plen = l; > @@ -3596,7 +3616,7 @@ void *address_space_map(AddressSpace *as, > } > if (!todo) { > raddr = memory_region_get_ram_addr(section->mr) > - + memory_region_section_addr(section, addr); > + + memory_region_section_addr(section, xlat); > } > > len -= l; > @@ -3635,7 +3655,7 @@ void address_space_unmap(AddressSpace *as, void > *buffer, hwaddr len, > return; > } > if (is_write) { > - address_space_write(as, bounce.addr, bounce.buffer, access_len); > + address_space_write(bounce.as, bounce.addr, bounce.buffer, > access_len); > } > qemu_vfree(bounce.buffer); > bounce.buffer = NULL; > diff --git a/memory.c b/memory.c > index ae3552b..ba2d4a0 100644 > --- a/memory.c > +++ b/memory.c > @@ -775,6 +775,12 @@ static void > memory_region_destructor_rom_device(MemoryRegion *mr) > qemu_ram_free(mr->ram_addr & TARGET_PAGE_MASK); > } > > +static void memory_region_destructor_iommu(MemoryRegion *mr) > +{ > + address_space_destroy(mr->iommu_target_as); > + g_free(mr->iommu_target_as); > +} > + > static bool memory_region_wrong_endianness(MemoryRegion *mr) > { > #ifdef TARGET_WORDS_BIGENDIAN > @@ -789,6 +795,7 @@ void memory_region_init(MemoryRegion *mr, > uint64_t size) > { > mr->ops = NULL; > + mr->iommu_ops = NULL; > mr->parent = NULL; > mr->size = int128_make64(size); > if (size == UINT64_MAX) { > @@ -980,6 +987,100 @@ void memory_region_init_rom_device(MemoryRegion *mr, > mr->ram_addr = qemu_ram_alloc(size, mr); > } > > +static void memory_region_iommu_rw(MemoryRegion *iommu, hwaddr addr, > + uint8_t *buf, unsigned len, bool is_write) > +{ > + IOMMUTLBEntry tlb; > + unsigned clen; > + hwaddr xlat; > + > + while (len) { > + tlb = iommu->iommu_ops->translate(iommu, addr); > + clen = (MIN(addr | tlb.addr_mask, addr + len - 1) - addr) + 1; > + if (tlb.perm[is_write]) { > + xlat = (tlb.translated_addr & ~tlb.addr_mask) | (addr & > tlb.addr_mask); > + address_space_rw(iommu->iommu_target_as, xlat, buf, clen, > is_write); > + } else { > + if (!is_write) { > + memset(buf, 0xff, clen); > + } > + } > + buf += clen; > + addr += clen; > + len -= clen; > + } > +} > + > +static uint64_t memory_region_iommu_read(void *opaque, hwaddr addr, > + unsigned size) > +{ > + MemoryRegion *iommu = opaque; > + union { > + uint8_t buf[8]; > + uint8_t u8; > + uint16_t u16; > + uint32_t u32; > + uint64_t u64; > + } ret; > + > + memory_region_iommu_rw(iommu, addr, ret.buf, size, false); > + switch (size) { > + case 1: return ret.u8; > + case 2: return ret.u16; > + case 4: return ret.u32; > + case 8: return ret.u64; > + default: abort(); > + } > +} > + > +static void memory_region_iommu_write(void *opaque, hwaddr addr, > + uint64_t data, unsigned size) > +{ > + MemoryRegion *iommu = opaque; > + union { > + uint8_t buf[8]; > + uint8_t u8; > + uint16_t u16; > + uint32_t u32; > + uint64_t u64; > + } in; > + > + switch (size) { > + case 1: in.u8 = data; break; > + case 2: in.u16 = data; break; > + case 4: in.u32 = data; break; > + case 8: in.u64 = data; break; > + default: abort(); > + } > + memory_region_iommu_rw(iommu, addr, in.buf, size, true); > +} > + > +static MemoryRegionOps memory_region_iommu_ops = { > + .read = memory_region_iommu_read, > + .write = memory_region_iommu_write, > +#ifdef HOST_BIGENDIAN > + .endianness = DEVICE_BIG_ENDIAN,
Why couple this with host endianness? I'd expect IOMMU to operate at target bus endianness, for example LE for PCI on PPC guest. > +#else > + .endianness = DEVICE_LITTLE_ENDIAN, > +#endif > +}; > + > +void memory_region_init_iommu(MemoryRegion *mr, > + MemoryRegionIOMMUOps *ops, > + MemoryRegion *target, > + const char *name, > + uint64_t size) > +{ > + memory_region_init(mr, name, size); > + mr->ops = &memory_region_iommu_ops; > + mr->iommu_ops = ops, > + mr->opaque = mr; > + mr->terminates = true; /* then re-forwards */ > + mr->destructor = memory_region_destructor_iommu; > + mr->iommu_target_as = g_new(AddressSpace, 1); > + address_space_init(mr->iommu_target_as, target); > +} > + > static uint64_t invalid_read(void *opaque, hwaddr addr, > unsigned size) > { > @@ -1054,6 +1155,11 @@ bool memory_region_is_rom(MemoryRegion *mr) > return mr->ram && mr->readonly; > } > > +bool memory_region_is_iommu(MemoryRegion *mr) > +{ > + return mr->iommu_ops; > +} > + > void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) > { > uint8_t mask = 1 << client; > diff --git a/memory.h b/memory.h > index 9462bfd..47362c9 100644 > --- a/memory.h > +++ b/memory.h > @@ -113,12 +113,28 @@ struct MemoryRegionOps { > const MemoryRegionMmio old_mmio; > }; > > +typedef struct IOMMUTLBEntry IOMMUTLBEntry; > +typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps; > + > +struct IOMMUTLBEntry { > + hwaddr device_addr; > + hwaddr translated_addr; > + hwaddr addr_mask; /* 0xfff = 4k translation */ > + bool perm[2]; /* read/write permissions */ Please document that bit 1 is write and 0 read. > +}; > + > +struct MemoryRegionIOMMUOps { > + /* Returns a TLB entry that contains a given address. */ > + IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr); Maybe the MemoryRegion could be const to declare that the translation does not change mappings. > +}; > + > typedef struct CoalescedMemoryRange CoalescedMemoryRange; > typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; > > struct MemoryRegion { > /* All fields are private - violators will be prosecuted */ > const MemoryRegionOps *ops; > + const MemoryRegionIOMMUOps *iommu_ops; > void *opaque; > MemoryRegion *parent; > Int128 size; > @@ -145,6 +161,7 @@ struct MemoryRegion { > uint8_t dirty_log_mask; > unsigned ioeventfd_nb; > MemoryRegionIoeventfd *ioeventfds; > + struct AddressSpace *iommu_target_as; > }; > > struct MemoryRegionPortio { > @@ -334,6 +351,25 @@ void memory_region_init_rom_device(MemoryRegion *mr, > void memory_region_init_reservation(MemoryRegion *mr, > const char *name, > uint64_t size); > + > +/** > + * memory_region_init_iommu: Initialize a memory region that translates > addresses > + * > + * An IOMMU region translates addresses and forwards accesses to a target > memory region. > + * > + * @mr: the #MemoryRegion to be initialized > + * @ops: a function that translates addresses into the @target region > + * @target: a #MemoryRegion that will be used to satisfy accesses to > translated > + * addresses > + * @name: used for debugging; not visible to the user or ABI > + * @size: size of the region. > + */ > +void memory_region_init_iommu(MemoryRegion *mr, > + MemoryRegionIOMMUOps *ops, > + MemoryRegion *target, > + const char *name, > + uint64_t size); > + > /** > * memory_region_destroy: Destroy a memory region and reclaim all resources. > * > @@ -373,6 +409,15 @@ static inline bool memory_region_is_romd(MemoryRegion > *mr) > } > > /** > + * memory_region_is_ram: check whether a memory region is an iommu > + * > + * Returns %true is a memory region is an iommu. > + * > + * @mr: the memory region being queried > + */ > +bool memory_region_is_iommu(MemoryRegion *mr); > + > +/** > * memory_region_name: get a memory region's name > * > * Returns the string that was used to initialize the memory region. > -- > 1.7.12 >