Re: [PATCH v3 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-07-15 Thread Robin Murphy

Hi Catalin,

Thanks for the review.

On 14/07/15 18:16, Catalin Marinas wrote:

On Fri, Jul 10, 2015 at 08:19:33PM +0100, Robin Murphy wrote:

+/*
+ * IOVAs are IOMMU _input_ addresses, so there still exists the possibility
+ * for static bus translation between device output and IOMMU input (yuck).
+ */
+static inline dma_addr_t dev_dma_addr(struct device *dev, dma_addr_t addr)
+{
+   dma_addr_t offset = (dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT;
+
+   BUG_ON(addr < offset);
+   return addr - offset;
+}


Are these just theoretical or you expect to see some at some point? I
think the dma_limit in __alloc_iova() may also need to take the offset
into account (or just ignore them altogether for now).


Right now I'm not aware of any platform which has both DMA offsets and 
an IOMMU on the same bus, and I would really hope it stays that way. 
This is just extra complication out of attempting to cover every 
possibility, and you're right about the alloc_iova oversight. I'll rip 
it out for simplicity, and remain hopeful that nobody ever builds 
anything mad enough to need it put back.



+
+/**
+ * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
+ * @dir: Direction of DMA transfer
+ * @coherent: Is the DMA master cache-coherent?
+ *
+ * Return: corresponding IOMMU API page protection flags
+ */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
+{
+   int prot = coherent ? IOMMU_CACHE : 0;
+
+   switch (dir) {
+   case DMA_BIDIRECTIONAL:
+   return prot | IOMMU_READ | IOMMU_WRITE;
+   case DMA_TO_DEVICE:
+   return prot | IOMMU_READ;
+   case DMA_FROM_DEVICE:
+   return prot | IOMMU_WRITE;
+   default:
+   return 0;
+   }
+}
+
+static struct iova *__alloc_iova(struct device *dev, size_t size, bool 
coherent)
+{
+   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+   struct iova_domain *iovad = domain->dma_api_cookie;
+   unsigned long shift = iova_shift(iovad);
+   unsigned long length = iova_align(iovad, size) >> shift;
+   u64 dma_limit = coherent ? dev->coherent_dma_mask : dma_get_mask(dev);


"coherent" and "coherent_dma_mask" have related meanings here. As I can
see in patch 3, the coherent information passed all the way to this
function states whether the device is cache coherent or not (and whether
cache maintenance is needed). The coherent_dma_mask refers to an
allocation mask for the dma_alloc_coherent() API but that doesn't
necessarily mean that the device is cache coherent. Similarly, dma_mask
is used for streaming DMA.

You can rename it to coherent_api or simply pass a u64 dma_mask
directly.


Bleh, it seems that at some point along the way I got confused and 
started mistakenly thinking the DMA masks were about the device's 
ability to issue coherent/non-coherent transactions. I'll clean up the 
mess...



[...]

+/**
+ * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
+ * @dev: Device to allocate memory for. Must be a real device
+ *  attached to an iommu_dma_domain
+ * @size: Size of buffer in bytes
+ * @gfp: Allocation flags
+ * @prot: IOMMU mapping flags
+ * @coherent: Which dma_mask to base IOVA allocation on
+ * @handle: Out argument for allocated DMA handle
+ * @flush_page: Arch callback to flush a single page from caches as
+ * necessary. May be NULL for coherent allocations
+ *
+ * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+ * but an IOMMU which supports smaller pages might not map the whole thing.
+ * For now, the buffer is unconditionally zeroed for compatibility
+ *
+ * Return: Array of struct page pointers describing the buffer,
+ *or NULL on failure.
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+   int prot, bool coherent, dma_addr_t *handle,
+   void (*flush_page)(const void *, phys_addr_t))


So for this function, coherent should always be true since this is only
used with the coherent DMA API AFAICT.


Indeed.


+{
+   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+   struct iova_domain *iovad = domain->dma_api_cookie;
+   struct iova *iova;
+   struct page **pages;
+   struct sg_table sgt;
+   struct sg_mapping_iter miter;
+   dma_addr_t dma_addr;
+   unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+   *handle = DMA_ERROR_CODE;
+
+   pages = __iommu_dma_alloc_pages(count, gfp);
+   if (!pages)
+   return NULL;
+
+   iova = __alloc_iova(dev, size, coherent);


And here just __alloc_iova(dev, size, true);


In fact, everything it wanted dev for is now available at all the 
callsites, so I'll rejig the whole interface.


Robin.


[...]

+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+   unsigned long offset, size_t size, int prot, bool coherent)
+{
+   dma_ad

Re: [PATCH v3 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-07-14 Thread Catalin Marinas
On Fri, Jul 10, 2015 at 08:19:33PM +0100, Robin Murphy wrote:
> +/*
> + * IOVAs are IOMMU _input_ addresses, so there still exists the possibility
> + * for static bus translation between device output and IOMMU input (yuck).
> + */
> +static inline dma_addr_t dev_dma_addr(struct device *dev, dma_addr_t addr)
> +{
> + dma_addr_t offset = (dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT;
> +
> + BUG_ON(addr < offset);
> + return addr - offset;
> +}

Are these just theoretical or you expect to see some at some point? I
think the dma_limit in __alloc_iova() may also need to take the offset
into account (or just ignore them altogether for now).

> +
> +/**
> + * dma_direction_to_prot - Translate DMA API directions to IOMMU API page 
> flags
> + * @dir: Direction of DMA transfer
> + * @coherent: Is the DMA master cache-coherent?
> + *
> + * Return: corresponding IOMMU API page protection flags
> + */
> +int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
> +{
> + int prot = coherent ? IOMMU_CACHE : 0;
> +
> + switch (dir) {
> + case DMA_BIDIRECTIONAL:
> + return prot | IOMMU_READ | IOMMU_WRITE;
> + case DMA_TO_DEVICE:
> + return prot | IOMMU_READ;
> + case DMA_FROM_DEVICE:
> + return prot | IOMMU_WRITE;
> + default:
> + return 0;
> + }
> +}
> +
> +static struct iova *__alloc_iova(struct device *dev, size_t size, bool 
> coherent)
> +{
> + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> + struct iova_domain *iovad = domain->dma_api_cookie;
> + unsigned long shift = iova_shift(iovad);
> + unsigned long length = iova_align(iovad, size) >> shift;
> + u64 dma_limit = coherent ? dev->coherent_dma_mask : dma_get_mask(dev);

"coherent" and "coherent_dma_mask" have related meanings here. As I can
see in patch 3, the coherent information passed all the way to this
function states whether the device is cache coherent or not (and whether
cache maintenance is needed). The coherent_dma_mask refers to an
allocation mask for the dma_alloc_coherent() API but that doesn't
necessarily mean that the device is cache coherent. Similarly, dma_mask
is used for streaming DMA.

You can rename it to coherent_api or simply pass a u64 dma_mask
directly.

[...]
> +/**
> + * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
> + * @dev: Device to allocate memory for. Must be a real device
> + *attached to an iommu_dma_domain
> + * @size: Size of buffer in bytes
> + * @gfp: Allocation flags
> + * @prot: IOMMU mapping flags
> + * @coherent: Which dma_mask to base IOVA allocation on
> + * @handle: Out argument for allocated DMA handle
> + * @flush_page: Arch callback to flush a single page from caches as
> + *   necessary. May be NULL for coherent allocations
> + *
> + * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
> + * but an IOMMU which supports smaller pages might not map the whole thing.
> + * For now, the buffer is unconditionally zeroed for compatibility
> + *
> + * Return: Array of struct page pointers describing the buffer,
> + *  or NULL on failure.
> + */
> +struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> + int prot, bool coherent, dma_addr_t *handle,
> + void (*flush_page)(const void *, phys_addr_t))

So for this function, coherent should always be true since this is only
used with the coherent DMA API AFAICT.

> +{
> + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> + struct iova_domain *iovad = domain->dma_api_cookie;
> + struct iova *iova;
> + struct page **pages;
> + struct sg_table sgt;
> + struct sg_mapping_iter miter;
> + dma_addr_t dma_addr;
> + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +
> + *handle = DMA_ERROR_CODE;
> +
> + pages = __iommu_dma_alloc_pages(count, gfp);
> + if (!pages)
> + return NULL;
> +
> + iova = __alloc_iova(dev, size, coherent);

And here just __alloc_iova(dev, size, true);

[...]
> +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
> + unsigned long offset, size_t size, int prot, bool coherent)
> +{
> + dma_addr_t dma_addr;
> + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> + struct iova_domain *iovad = domain->dma_api_cookie;
> + phys_addr_t phys = page_to_phys(page) + offset;
> + size_t iova_off = iova_offset(iovad, phys);
> + size_t len = iova_align(iovad, size + iova_off);
> + struct iova *iova = __alloc_iova(dev, len, coherent);

Here __alloc_iova(dev, len, false);

[...]
> +/*
> + * The DMA API client is passing in a scatterlist which could describe
> + * any old buffer layout, but the IOMMU API requires everything to be
> + * aligned to IOMMU pages. Hence the need for this complicated bit of
> + * impedance-matching, to be able to hand off a suitably-aligned list,
> + * but still preserv

Re: [PATCH v3 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-07-13 Thread Yong Wu
On Fri, 2015-07-10 at 20:19 +0100, Robin Murphy wrote:
> Taking inspiration from the existing arch/arm code, break out some
> generic functions to interface the DMA-API to the IOMMU-API. This will
> do the bulk of the heavy lifting for IOMMU-backed dma-mapping.
> 
> Signed-off-by: Robin Murphy 
> ---
>  drivers/iommu/Kconfig |   7 +
>  drivers/iommu/Makefile|   1 +
>  drivers/iommu/dma-iommu.c | 536 
> ++
>  include/linux/dma-iommu.h |  84 
>  include/linux/iommu.h |   1 +
>  5 files changed, 629 insertions(+)
>  create mode 100644 drivers/iommu/dma-iommu.c
>  create mode 100644 include/linux/dma-iommu.h
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index f1fb1d3..efb0e66 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -48,6 +48,13 @@ config OF_IOMMU
> def_bool y
> depends on OF && IOMMU_API
>  
> +# IOMMU-agnostic DMA-mapping layer
> +config IOMMU_DMA
> + bool
> + depends on NEED_SG_DMA_LENGTH
> + select IOMMU_API
> + select IOMMU_IOVA
> +
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
>   depends on PPC32
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index c6dcc51..f465cfb 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -1,6 +1,7 @@
>  obj-$(CONFIG_IOMMU_API) += iommu.o
>  obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
> +obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
>  obj-$(CONFIG_IOMMU_IOVA) += iova.o
[...]
> +/**
> + * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
> + * @dev: Device to allocate memory for. Must be a real device
> + *attached to an iommu_dma_domain
> + * @size: Size of buffer in bytes
> + * @gfp: Allocation flags
> + * @prot: IOMMU mapping flags
> + * @coherent: Which dma_mask to base IOVA allocation on
> + * @handle: Out argument for allocated DMA handle
> + * @flush_page: Arch callback to flush a single page from caches as
> + *   necessary. May be NULL for coherent allocations
> + *
> + * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
> + * but an IOMMU which supports smaller pages might not map the whole thing.
> + * For now, the buffer is unconditionally zeroed for compatibility
> + *
> + * Return: Array of struct page pointers describing the buffer,
> + *  or NULL on failure.
> + */
> +struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> + int prot, bool coherent, dma_addr_t *handle,
> + void (*flush_page)(const void *, phys_addr_t))
> +{
> + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);

Compare with DMA-v2."struct iommu_dma_domain" is deleted and the iommu
domain is got from its iommu_group->domain. So I have to create a
iommu_group.
And the struct iommu_group is defined in iommu.c, I can not write like
this: group->domain = .

After check, I have to use iommu_attach_group.
Then our code may like this:
//
static int mtk_iommu_add_device(struct device *dev)
{
  ***
if (!dev->archdata.dma_ops)/* Not a iommu client device */
return -ENODEV;
group = iommu_group_get(dev);
if (!group)
group = iommu_group_alloc();

ret = iommu_group_add_device(group, dev);

/* get the mtk_iommu_domain from the master iommu device */
mtkdom = ;
iommu_attach_group(&mtkdom->domain, group); /*attach the iommu domain
*/
iommu_group_put(group);
return ret;
}

static int mtk_iommu_attach_device(struct iommu_domain *domain,
   struct device *dev)
{
struct mtk_iommu_domain *priv = to_mtk_domain(domain), *imudom;
struct iommu_group *group;

/* Reserve one iommu domain as the m4u domain which all 
 * Multimedia modules share and free the others */
if (!imudev->archdata.iommu)
imudev->archdata.iommu = priv;
else if (imudev->archdata.iommu != priv)
iommu_domain_free(domain);

group = iommu_group_get(dev);
  /* return 0 while the attach device is from
__iommu_attach_notifier.
   * the iommu_group will be created in add_device after
mtk-iommu-probe
   */
if (!group)
return 0;
iommu_group_put(group);

mtk_iommu_init_domain_context(priv); /* init the pagetable */
mtk_iommu_config(priv, dev, true); /* config the iommu info */

return 0;
}
//
Is it ok? I'm preparing the next patch like this, Could you help
give some suggestion about the flow.
Thanks very much.

> + struct iova_domain *iovad = domain->dma_api_cookie;
> + struct iova *iova;
> + struct page **pages;
> + struct sg_table sgt;
> + struct sg_mapping_iter mite

[PATCH v3 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-07-10 Thread Robin Murphy
Taking inspiration from the existing arch/arm code, break out some
generic functions to interface the DMA-API to the IOMMU-API. This will
do the bulk of the heavy lifting for IOMMU-backed dma-mapping.

Signed-off-by: Robin Murphy 
---
 drivers/iommu/Kconfig |   7 +
 drivers/iommu/Makefile|   1 +
 drivers/iommu/dma-iommu.c | 536 ++
 include/linux/dma-iommu.h |  84 
 include/linux/iommu.h |   1 +
 5 files changed, 629 insertions(+)
 create mode 100644 drivers/iommu/dma-iommu.c
 create mode 100644 include/linux/dma-iommu.h

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f1fb1d3..efb0e66 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -48,6 +48,13 @@ config OF_IOMMU
def_bool y
depends on OF && IOMMU_API
 
+# IOMMU-agnostic DMA-mapping layer
+config IOMMU_DMA
+   bool
+   depends on NEED_SG_DMA_LENGTH
+   select IOMMU_API
+   select IOMMU_IOVA
+
 config FSL_PAMU
bool "Freescale IOMMU support"
depends on PPC32
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc51..f465cfb 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
new file mode 100644
index 000..f4559c7
--- /dev/null
+++ b/drivers/iommu/dma-iommu.c
@@ -0,0 +1,536 @@
+/*
+ * A fairly generic DMA-API to IOMMU-API glue layer.
+ *
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * based in part on arch/arm/mm/dma-mapping.c:
+ * Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+int iommu_dma_init(void)
+{
+   return iommu_iova_cache_init();
+}
+
+/**
+ * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
+ * @domain: IOMMU domain to prepare for DMA-API usage
+ *
+ * IOMMU drivers should normally call this from their domain_alloc
+ * callback when domain->type == IOMMU_DOMAIN_DMA.
+ */
+int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+   struct iova_domain *iovad;
+
+   if (domain->dma_api_cookie)
+   return -EEXIST;
+
+   iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
+   domain->dma_api_cookie = iovad;
+
+   return iovad ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(iommu_get_dma_cookie);
+
+/**
+ * iommu_put_dma_cookie - Release a domain's DMA mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ *
+ * IOMMU drivers should normally call this from their domain_free callback.
+ */
+void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+   struct iova_domain *iovad = domain->dma_api_cookie;
+
+   if (!iovad)
+   return;
+
+   put_iova_domain(iovad);
+   kfree(iovad);
+   domain->dma_api_cookie = NULL;
+}
+EXPORT_SYMBOL(iommu_put_dma_cookie);
+
+/**
+ * iommu_dma_init_domain - Initialise a DMA mapping domain
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ * @base: IOVA at which the mappable address space starts
+ * @size: Size of IOVA space
+ *
+ * @base and @size should be exact multiples of IOMMU page granularity to
+ * avoid rounding surprises. If necessary, we reserve the page at address 0
+ * to ensure it is an invalid IOVA.
+ */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 
size)
+{
+   struct iova_domain *iovad = domain->dma_api_cookie;
+   unsigned long order, base_pfn, end_pfn;
+
+   if (!iovad)
+   return -ENODEV;
+
+   /* Use the smallest supported page size for IOVA granularity */
+   order = __ffs(domain->ops->pgsize_bitmap);
+   base_pfn = max_t(unsigned long, 1, base >> order);
+   end_pfn = (base + size - 1) >> order;
+
+   /* Check the domain allows at least some access to the device... */
+   if (domain->geometry.force_aperture) {
+   if (base > domain->geometry.aperture_end ||
+   base + size <= domain->geometry.aperture_start) {
+   pr_warn("specified DMA range outside IOMMU