On Tue, Oct 30, 2012 at 11:47 AM, Avi Kivity <a...@redhat.com> wrote:
> Add a new memory region type that translates addresses it is given,
> then forwards them to a target address space.  This is similar to
> an alias, except that the mapping is more flexible than a linear
> translation and trucation, and also less efficient since the
> translation happens at runtime.
>
> The implementation uses an AddressSpace mapping the target region to
> avoid hierarchical dispatch all the way to the resolved region; only
> iommu regions are looked up dynamically.
>
> Signed-off-by: Avi Kivity <a...@redhat.com>
> ---
>  exec.c   |  28 ++++++++++++++---
>  memory.c | 106 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  memory.h |  45 +++++++++++++++++++++++++++
>  3 files changed, 175 insertions(+), 4 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index deee8ec..c3914fc 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -3507,6 +3507,7 @@ void cpu_physical_memory_write_rom(hwaddr addr,
>
>  typedef struct {
>      void *buffer;
> +    AddressSpace *as;
>      hwaddr addr;
>      hwaddr len;
>  } BounceBuffer;
> @@ -3572,23 +3573,42 @@ void *address_space_map(AddressSpace *as,
>      ram_addr_t raddr = RAM_ADDR_MAX;
>      ram_addr_t rlen;
>      void *ret;
> +    IOMMUTLBEntry iotlb;
> +    hwaddr xlat;
> +    AddressSpace *as_xlat;
>
>      while (len > 0) {
> +        xlat = addr;
> +        as_xlat = as;
>          page = addr & TARGET_PAGE_MASK;
>          l = (page + TARGET_PAGE_SIZE) - addr;
>          if (l > len)
>              l = len;
>          section = phys_page_find(d, page >> TARGET_PAGE_BITS);
>
> +        while (section->mr->iommu_ops) {
> +            iotlb = section->mr->iommu_ops->translate(section->mr, xlat);
> +            if (iotlb.perm[is_write]) {
> +                xlat = ((iotlb.translated_addr & ~iotlb.addr_mask)
> +                        | (addr & iotlb.addr_mask));
> +                as_xlat = section->mr->iommu_target_as;
> +                l = (MIN(xlat + l - 1, xlat | iotlb.addr_mask) - xlat) + 1;
> +                section = phys_page_find(as_xlat->dispatch, xlat >> 
> TARGET_PAGE_BITS);
> +            } else {
> +                section = &phys_sections[phys_section_unassigned];
> +            }
> +        }
> +
>          if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
>              if (todo || bounce.buffer) {
>                  break;
>              }
>              bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, 
> TARGET_PAGE_SIZE);
> -            bounce.addr = addr;
> +            bounce.addr = xlat;
> +            bounce.as = as_xlat;
>              bounce.len = l;
>              if (!is_write) {
> -                address_space_read(as, addr, bounce.buffer, l);
> +                address_space_read(as_xlat, xlat, bounce.buffer, l);
>              }
>
>              *plen = l;
> @@ -3596,7 +3616,7 @@ void *address_space_map(AddressSpace *as,
>          }
>          if (!todo) {
>              raddr = memory_region_get_ram_addr(section->mr)
> -                + memory_region_section_addr(section, addr);
> +                + memory_region_section_addr(section, xlat);
>          }
>
>          len -= l;
> @@ -3635,7 +3655,7 @@ void address_space_unmap(AddressSpace *as, void 
> *buffer, hwaddr len,
>          return;
>      }
>      if (is_write) {
> -        address_space_write(as, bounce.addr, bounce.buffer, access_len);
> +        address_space_write(bounce.as, bounce.addr, bounce.buffer, 
> access_len);
>      }
>      qemu_vfree(bounce.buffer);
>      bounce.buffer = NULL;
> diff --git a/memory.c b/memory.c
> index ae3552b..ba2d4a0 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -775,6 +775,12 @@ static void 
> memory_region_destructor_rom_device(MemoryRegion *mr)
>      qemu_ram_free(mr->ram_addr & TARGET_PAGE_MASK);
>  }
>
> +static void memory_region_destructor_iommu(MemoryRegion *mr)
> +{
> +    address_space_destroy(mr->iommu_target_as);
> +    g_free(mr->iommu_target_as);
> +}
> +
>  static bool memory_region_wrong_endianness(MemoryRegion *mr)
>  {
>  #ifdef TARGET_WORDS_BIGENDIAN
> @@ -789,6 +795,7 @@ void memory_region_init(MemoryRegion *mr,
>                          uint64_t size)
>  {
>      mr->ops = NULL;
> +    mr->iommu_ops = NULL;
>      mr->parent = NULL;
>      mr->size = int128_make64(size);
>      if (size == UINT64_MAX) {
> @@ -980,6 +987,100 @@ void memory_region_init_rom_device(MemoryRegion *mr,
>      mr->ram_addr = qemu_ram_alloc(size, mr);
>  }
>
> +static void memory_region_iommu_rw(MemoryRegion *iommu, hwaddr addr,
> +                                   uint8_t *buf, unsigned len, bool is_write)
> +{
> +    IOMMUTLBEntry tlb;
> +    unsigned clen;
> +    hwaddr xlat;
> +
> +    while (len) {
> +        tlb = iommu->iommu_ops->translate(iommu, addr);
> +        clen = (MIN(addr | tlb.addr_mask, addr + len - 1) - addr) + 1;
> +        if (tlb.perm[is_write]) {
> +            xlat = (tlb.translated_addr & ~tlb.addr_mask) | (addr & 
> tlb.addr_mask);
> +            address_space_rw(iommu->iommu_target_as, xlat, buf, clen, 
> is_write);
> +        } else {
> +            if (!is_write) {
> +                memset(buf, 0xff, clen);
> +            }
> +        }
> +        buf += clen;
> +        addr += clen;
> +        len -= clen;
> +    }
> +}
> +
> +static uint64_t memory_region_iommu_read(void *opaque, hwaddr addr,
> +                                         unsigned size)
> +{
> +    MemoryRegion *iommu = opaque;
> +    union {
> +        uint8_t buf[8];
> +        uint8_t u8;
> +        uint16_t u16;
> +        uint32_t u32;
> +        uint64_t u64;
> +    } ret;
> +
> +    memory_region_iommu_rw(iommu, addr, ret.buf, size, false);
> +    switch (size) {
> +    case 1: return ret.u8;
> +    case 2: return ret.u16;
> +    case 4: return ret.u32;
> +    case 8: return ret.u64;
> +    default: abort();
> +    }
> +}
> +
> +static void memory_region_iommu_write(void *opaque, hwaddr addr,
> +                                      uint64_t data, unsigned size)
> +{
> +    MemoryRegion *iommu = opaque;
> +    union {
> +        uint8_t buf[8];
> +        uint8_t u8;
> +        uint16_t u16;
> +        uint32_t u32;
> +        uint64_t u64;
> +    } in;
> +
> +    switch (size) {
> +    case 1: in.u8 = data; break;
> +    case 2: in.u16 = data; break;
> +    case 4: in.u32 = data; break;
> +    case 8: in.u64 = data; break;
> +    default: abort();
> +    }
> +    memory_region_iommu_rw(iommu, addr, in.buf, size, true);
> +}
> +
> +static MemoryRegionOps memory_region_iommu_ops = {
> +    .read = memory_region_iommu_read,
> +    .write = memory_region_iommu_write,
> +#ifdef HOST_BIGENDIAN
> +    .endianness = DEVICE_BIG_ENDIAN,

Why couple this with host endianness? I'd expect IOMMU to operate at
target bus endianness, for example LE for PCI on PPC guest.

> +#else
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +#endif
> +};
> +
> +void memory_region_init_iommu(MemoryRegion *mr,
> +                              MemoryRegionIOMMUOps *ops,
> +                              MemoryRegion *target,
> +                              const char *name,
> +                              uint64_t size)
> +{
> +    memory_region_init(mr, name, size);
> +    mr->ops = &memory_region_iommu_ops;
> +    mr->iommu_ops = ops,
> +    mr->opaque = mr;
> +    mr->terminates = true;  /* then re-forwards */
> +    mr->destructor = memory_region_destructor_iommu;
> +    mr->iommu_target_as = g_new(AddressSpace, 1);
> +    address_space_init(mr->iommu_target_as, target);
> +}
> +
>  static uint64_t invalid_read(void *opaque, hwaddr addr,
>                               unsigned size)
>  {
> @@ -1054,6 +1155,11 @@ bool memory_region_is_rom(MemoryRegion *mr)
>      return mr->ram && mr->readonly;
>  }
>
> +bool memory_region_is_iommu(MemoryRegion *mr)
> +{
> +    return mr->iommu_ops;
> +}
> +
>  void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
>  {
>      uint8_t mask = 1 << client;
> diff --git a/memory.h b/memory.h
> index 9462bfd..47362c9 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -113,12 +113,28 @@ struct MemoryRegionOps {
>      const MemoryRegionMmio old_mmio;
>  };
>
> +typedef struct IOMMUTLBEntry IOMMUTLBEntry;
> +typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
> +
> +struct IOMMUTLBEntry {
> +    hwaddr device_addr;
> +    hwaddr translated_addr;
> +    hwaddr addr_mask;  /* 0xfff = 4k translation */
> +    bool perm[2]; /* read/write permissions */

Please document that bit 1 is write and 0 read.

> +};
> +
> +struct MemoryRegionIOMMUOps {
> +    /* Returns a TLB entry that contains a given address. */
> +    IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr);

Maybe the MemoryRegion could be const to declare that the translation
does not change mappings.

> +};
> +
>  typedef struct CoalescedMemoryRange CoalescedMemoryRange;
>  typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
>
>  struct MemoryRegion {
>      /* All fields are private - violators will be prosecuted */
>      const MemoryRegionOps *ops;
> +    const MemoryRegionIOMMUOps *iommu_ops;
>      void *opaque;
>      MemoryRegion *parent;
>      Int128 size;
> @@ -145,6 +161,7 @@ struct MemoryRegion {
>      uint8_t dirty_log_mask;
>      unsigned ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
> +    struct AddressSpace *iommu_target_as;
>  };
>
>  struct MemoryRegionPortio {
> @@ -334,6 +351,25 @@ void memory_region_init_rom_device(MemoryRegion *mr,
>  void memory_region_init_reservation(MemoryRegion *mr,
>                                      const char *name,
>                                      uint64_t size);
> +
> +/**
> + * memory_region_init_iommu: Initialize a memory region that translates 
> addresses
> + *
> + * An IOMMU region translates addresses and forwards accesses to a target 
> memory region.
> + *
> + * @mr: the #MemoryRegion to be initialized
> + * @ops: a function that translates addresses into the @target region
> + * @target: a #MemoryRegion that will be used to satisfy accesses to 
> translated
> + *          addresses
> + * @name: used for debugging; not visible to the user or ABI
> + * @size: size of the region.
> + */
> +void memory_region_init_iommu(MemoryRegion *mr,
> +                              MemoryRegionIOMMUOps *ops,
> +                              MemoryRegion *target,
> +                              const char *name,
> +                              uint64_t size);
> +
>  /**
>   * memory_region_destroy: Destroy a memory region and reclaim all resources.
>   *
> @@ -373,6 +409,15 @@ static inline bool memory_region_is_romd(MemoryRegion 
> *mr)
>  }
>
>  /**
> + * memory_region_is_ram: check whether a memory region is an iommu
> + *
> + * Returns %true is a memory region is an iommu.
> + *
> + * @mr: the memory region being queried
> + */
> +bool memory_region_is_iommu(MemoryRegion *mr);
> +
> +/**
>   * memory_region_name: get a memory region's name
>   *
>   * Returns the string that was used to initialize the memory region.
> --
> 1.7.12
>

Reply via email to