Re: [Xen-devel] [PATCH 07/11] swiotlb-xen: provide a single page-coherent.h header

2019-08-16 Thread Christoph Hellwig
On Fri, Aug 16, 2019 at 11:40:43PM +0100, Julien Grall wrote:
> I am not sure I agree with this rename. The implementation of the helpers 
> are very Arm specific as this is assuming Dom0 is 1:1 mapped.
>
> This was necessary due to the lack of IOMMU on Arm platforms back then.
> But this is now a pain to get rid of it on newer platform...

So if you look at the final version of the header after the whole
series, what assumes a 1:1 mapping?  It all just is

if (pfn_valid())
local cache sync;
else
call into the arch code;

are you concerned that the local cache sync might have to be split
up more for a non-1:1 map in that case?  We could just move
the xen_dma_* routines into the arch instead of __xen_dma, but it
really helps to have a common interface header.


[PATCH v2] iommu/amd: Override wrong IVRS IOAPIC on Raven Ridge systems

2019-08-16 Thread Kai-Heng Feng
Raven Ridge systems may have malfunction touchpad or hang at boot if
incorrect IVRS IOAPIC is provided by BIOS.

Users already found correct "ivrs_ioapic=" values, let's put them inside
kernel to workaround buggy BIOS.

BugLink: https://bugs.launchpad.net/bugs/1795292
BugLink: https://bugs.launchpad.net/bugs/1837688
Signed-off-by: Kai-Heng Feng 
---
v2:
Split the quirk to another file.

 drivers/iommu/Makefile   |  2 +-
 drivers/iommu/amd_iommu.h| 14 +
 drivers/iommu/amd_iommu_init.c   |  5 +-
 drivers/iommu/amd_iommu_quirks.c | 90 
 4 files changed, 109 insertions(+), 2 deletions(-)
 create mode 100644 drivers/iommu/amd_iommu.h
 create mode 100644 drivers/iommu/amd_iommu_quirks.c

diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index f13f36ae1af6..c6a277e69848 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
 obj-$(CONFIG_OF_IOMMU) += of_iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
-obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
+obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
 obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h
new file mode 100644
index ..12d540d9b59b
--- /dev/null
+++ b/drivers/iommu/amd_iommu.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef AMD_IOMMU_H
+#define AMD_IOMMU_H
+
+int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line);
+
+#ifdef CONFIG_DMI
+void amd_iommu_apply_ivrs_quirks(void);
+#else
+static void amd_iommu_apply_ivrs_quirks(void) { }
+#endif
+
+#endif
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4413aa67000e..568c52317757 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -32,6 +32,7 @@
 #include 
 
 #include 
+#include "amd_iommu.h"
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -1002,7 +1003,7 @@ static void __init set_dev_entry_from_acpi(struct 
amd_iommu *iommu,
set_iommu_for_device(iommu, devid);
 }
 
-static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
+int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
 {
struct devid_map *entry;
struct list_head *list;
@@ -1153,6 +1154,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu 
*iommu,
if (ret)
return ret;
 
+   amd_iommu_apply_ivrs_quirks();
+
/*
 * First save the recommended feature enable bits from ACPI
 */
diff --git a/drivers/iommu/amd_iommu_quirks.c b/drivers/iommu/amd_iommu_quirks.c
new file mode 100644
index ..14181f0f5c2a
--- /dev/null
+++ b/drivers/iommu/amd_iommu_quirks.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Quirks for AMD IOMMU
+ *
+ * Copyright (C) 2019 Kai-Heng Feng 
+ */
+
+#include 
+
+#include "amd_iommu.h"
+
+#define IVHD_SPECIAL_IOAPIC1
+
+struct ivrs_quirk_entry {
+   u8 id;
+   u16 devid;
+};
+
+enum {
+   DELL_INSPIRON_7375 = 0,
+   DELL_LATITUDE_5495,
+   LENOVO_IDEAPAD_330S_15ARR,
+};
+
+static const struct ivrs_quirk_entry ivrs_ioapic_quirks[][3] __initconst = {
+   /* ivrs_ioapic[4]=00:14.0 ivrs_ioapic[5]=00:00.2 */
+   [DELL_INSPIRON_7375] = {
+   { .id = 4, .devid = 0xa0 },
+   { .id = 5, .devid = 0x2 },
+   {}
+   },
+   /* ivrs_ioapic[4]=00:14.0 */
+   [DELL_LATITUDE_5495] = {
+   { .id = 4, .devid = 0xa0 },
+   {}
+   },
+   /* ivrs_ioapic[32]=00:14.0 */
+   [LENOVO_IDEAPAD_330S_15ARR] = {
+   { .id = 32, .devid = 0xa0 },
+   {}
+   },
+   {}
+};
+
+static int __init ivrs_ioapic_quirk_cb(const struct dmi_system_id *d)
+{
+   const struct ivrs_quirk_entry *i;
+
+   for (i = d->driver_data; i->id != 0 && i->devid != 0; i++)
+   add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u16 
*)&i->devid, 0);
+
+   return 0;
+}
+
+static const struct dmi_system_id ivrs_quirks[] __initconst = {
+   {
+   .callback = ivrs_ioapic_quirk_cb,
+   .ident = "Dell Inspiron 7375",
+   .matches = {
+   DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+   DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7375"),
+   },
+   .driver_data = (void *)&ivrs_ioapic_quirks[DELL_INSPIRON_7375],
+   },
+   {
+   .callback = ivrs_ioapic_quirk_cb,
+   .ident = "Dell Latitude 5495",
+   .matches = {
+   DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+   DMI_MATCH(DMI_PROD

Re: [PATCH V5 3/5] iommu/dma-iommu: Handle deferred devices

2019-08-16 Thread Hillf Danton


On Thu, 15 Aug 2019 12:09:41 +0100 Tom Murphy wrote:
> 
> Handle devices which defer their attach to the iommu in the dma-iommu api
> 
> Signed-off-by: Tom Murphy 
> ---
>  drivers/iommu/dma-iommu.c | 27 ++-
>  1 file changed, 26 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 2712fbc68b28..906b7fa14d3c 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  struct iommu_dma_msi_page {
>   struct list_headlist;
> @@ -351,6 +352,21 @@ static int iommu_dma_init_domain(struct iommu_domain 
> *domain, dma_addr_t base,
>   return iova_reserve_iommu_regions(dev, domain);
>  }
>  
> +static int handle_deferred_device(struct device *dev,
> + struct iommu_domain *domain)
> +{
> + const struct iommu_ops *ops = domain->ops;
> +
> + if (!is_kdump_kernel())
> + return 0;
> +
> + if (unlikely(ops->is_attach_deferred &&
> + ops->is_attach_deferred(domain, dev)))
> + return iommu_attach_device(domain, dev);
> +
> + return 0;
> +}
> +
>  /**
>   * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU 
> API
>   *page flags.
> @@ -463,6 +479,9 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
> phys_addr_t phys,
>   size_t iova_off = iova_offset(iovad, phys);
>   dma_addr_t iova;
>  
> + if (unlikely(handle_deferred_device(dev, domain)))
> + return DMA_MAPPING_ERROR;
> +
>   size = iova_align(iovad, size + iova_off);
>  
>   iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);

iommu_map_atomic() is applied to __iommu_dma_map() in 2/5.
Is it an atomic context currently given the mutex_lock() in
iommu_attach_device()?

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/omap: Use the correct type for SLAB_HWCACHE_ALIGN

2019-08-16 Thread Suman Anna via iommu
The macro SLAB_HWCACHE_ALIGN is of type slab_flags_t, but is currently
assigned in the OMAP IOMMU driver using a unsigned long variable. This
generates a sparse warning around the type check. Fix this by defining
the variable flags using the correct type.

Signed-off-by: Suman Anna 
---
 drivers/iommu/omap-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 99a9ff3e7f71..8645e9b175a3 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1818,7 +1818,7 @@ static const struct iommu_ops omap_iommu_ops = {
 static int __init omap_iommu_init(void)
 {
struct kmem_cache *p;
-   const unsigned long flags = SLAB_HWCACHE_ALIGN;
+   const slab_flags_t flags = SLAB_HWCACHE_ALIGN;
size_t align = 1 << 10; /* L2 pagetable alignement */
struct device_node *np;
int ret;
-- 
2.22.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: regression in ath10k dma allocation

2019-08-16 Thread Tobias Klausmann

Hi Nicolin,

On 17.08.19 00:25, Nicolin Chen wrote:

Hi Tobias

On Fri, Aug 16, 2019 at 10:16:45PM +0200, Tobias Klausmann wrote:

do you have CONFIG_DMA_CMA set in your config?  If not please make sure
you have this commit in your testing tree, and if the problem still
persists it would be a little odd and we'd have to dig deeper:

commit dd3dcede9fa0a0b661ac1f24843f4a1b1317fdb6
Author: Nicolin Chen 
Date:   Wed May 29 17:54:25 2019 -0700

  dma-contiguous: fix !CONFIG_DMA_CMA version of dma_{alloc, 
free}_contiguous()

yes CONFIG_DMA_CMA is set (=y, see attached config), the commit you mention
above is included, if you have any hints how to go forward, please let me
know!

For CONFIG_DMA_CMA=y, by judging the log with error code -12, I
feel this one should work for you. Would you please check if it
is included or try it out otherwise?

dma-contiguous: do not overwrite align in dma_alloc_contiguous()
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=c6622a425acd1d2f3a443cd39b490a8777b622d7



Thanks for the hint, yet the commit is included and does not fix the 
problem!


Greetings,

Tobias

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Xen-devel] [PATCH 07/11] swiotlb-xen: provide a single page-coherent.h header

2019-08-16 Thread Julien Grall

Hi,

On 8/16/19 2:00 PM, Christoph Hellwig wrote:

Merge the various page-coherent.h files into a single one that either
provides prototypes or stubs depending on the need for cache
maintainance.

For extra benefits alo include  in the file
actually implementing the interfaces provided.

Signed-off-by: Christoph Hellwig 
---
  arch/arm/include/asm/xen/page-coherent.h   |  2 --
  arch/arm/xen/mm.c  |  1 +
  arch/arm64/include/asm/xen/page-coherent.h |  2 --
  arch/x86/include/asm/xen/page-coherent.h   | 22 --
  drivers/xen/swiotlb-xen.c  |  4 +---
  include/Kbuild |  2 +-
  include/xen/{arm => }/page-coherent.h  | 27 +++---


I am not sure I agree with this rename. The implementation of the 
helpers are very Arm specific as this is assuming Dom0 is 1:1 mapped.


This was necessary due to the lack of IOMMU on Arm platforms back then.
But this is now a pain to get rid of it on newer platform...

Cheers,

--
Julien Grall


Re: regression in ath10k dma allocation

2019-08-16 Thread Nicolin Chen
Hi Tobias

On Fri, Aug 16, 2019 at 10:16:45PM +0200, Tobias Klausmann wrote:
> > do you have CONFIG_DMA_CMA set in your config?  If not please make sure
> > you have this commit in your testing tree, and if the problem still
> > persists it would be a little odd and we'd have to dig deeper:
> > 
> > commit dd3dcede9fa0a0b661ac1f24843f4a1b1317fdb6
> > Author: Nicolin Chen 
> > Date:   Wed May 29 17:54:25 2019 -0700
> > 
> >  dma-contiguous: fix !CONFIG_DMA_CMA version of dma_{alloc, 
> > free}_contiguous()

> yes CONFIG_DMA_CMA is set (=y, see attached config), the commit you mention
> above is included, if you have any hints how to go forward, please let me
> know!

For CONFIG_DMA_CMA=y, by judging the log with error code -12, I
feel this one should work for you. Would you please check if it
is included or try it out otherwise?

dma-contiguous: do not overwrite align in dma_alloc_contiguous()
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=c6622a425acd1d2f3a443cd39b490a8777b622d7

Thanks
Nicolin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Freedreno] [PATCH v3 0/2] iommu/arm-smmu: Split pagetable support

2019-08-16 Thread Jordan Crouse
On Fri, Aug 16, 2019 at 08:43:53PM +0100, Robin Murphy wrote:
> On 16/08/2019 19:12, Rob Clark wrote:
> >On Fri, Aug 16, 2019 at 9:58 AM Robin Murphy  wrote:
> >>
> >>Hi Jordan,
> >>
> >>On 15/08/2019 16:33, Jordan Crouse wrote:
> >>>On Wed, Aug 07, 2019 at 04:21:38PM -0600, Jordan Crouse wrote:
> (Sigh, resend. I freaked out my SMTP server)
> 
> This is part of an ongoing evolution for enabling split pagetable support 
> for
> arm-smmu. Previous versions can be found [1].
> 
> In the discussion for v2 Robin pointed out that this is a very Adreno 
> specific
> use case and that is exactly true. Not only do we want to configure and 
> use a
> pagetable in the TTBR1 space, we also want to configure the TTBR0 region 
> but
> not allocate a pagetable for it or touch it until the GPU hardware does 
> so. As
> much as I want it to be a generic concept it really isn't.
> 
> This revision leans into that idea. Most of the same io-pgtable code is 
> there
> but now it is wrapped as an Adreno GPU specific format that is selected 
> by the
> compatible string in the arm-smmu device.
> 
> Additionally, per Robin's suggestion we are skipping creating a TTBR0 
> pagetable
> to save on wasted memory.
> 
> This isn't as clean as I would like it to be but I think that this is a 
> better
> direction than trying to pretend that the generic format would work.
> 
> I'm tempting fate by posting this and then taking some time off, but I 
> wanted
> to try to kick off a conversation or at least get some flames so I can 
> try to
> refine this again next week. Please take a look and give some advice on 
> the
> direction.
> >>>
> >>>Will, Robin -
> >>>
> >>>Modulo the impl changes from Robin, do you think that using a dedicated
> >>>pagetable format is the right approach for supporting split pagetables for 
> >>>the
> >>>Adreno GPU?
> >>
> >>How many different Adreno drivers would benefit from sharing it?
> >
> >Hypothetically everything back to a3xx, so I *could* see usefulness of
> >this in qcom_iommu (or maybe even msm-iommu).  OTOH maybe with
> >"modularizing" arm-smmu we could re-combine qcom_iommu and arm-smmu.
> 
> Indeed, that's certainly something I'm planning to investigate as a future
> refactoring step.
> 
> >And as a practical matter, I'm not sure if anyone will get around to
> >backporting per-context pagetables as far back as a3xx.
> >
> >BR,
> >-R
> >
> >>The more I come back to this, the more I'm convinced that io-pgtable
> >>should focus on the heavy lifting of pagetable management - the code
> >>that nobody wants to have to write at all, let alone more than once -
> >>and any subtleties which aren't essential to that should be pushed back
> >>into whichever callers actually care. Consider that already, literally
> >>no caller actually uses an unmodified stage 1 TCR value as provided in
> >>the io_pgtable_cfg.
> >>
> >>I feel it would be most productive to elaborate further in the form of
> >>patches, so let me get right on that and try to bash something out
> >>before I go home tonight...
> 
> ...and now there's a rough WIP branch here:
> 
> http://linux-arm.org/git?p=linux-rm.git;a=shortlog;h=refs/heads/iommu/pgtable
> 
> I'll finish testing and polishing those patches at some point next week,
> probably, but hopefully they're sufficiently illustrative for the moment.

This looks great so far. I can see where the TTBR1 stuff would fit in and I like
it a lot. I'll try to have some patches ready when you are done polishing.

Jordan

> Robin.

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 3/5] block: sort headers on blk-setting.c

2019-08-16 Thread Wolfram Sang
On Fri, Jul 26, 2019 at 05:31:14PM +0900, Yoshihiro Shimoda wrote:
> This patch sorts the headers in alphabetic order to ease
> the maintenance for this part.
> 
> Signed-off-by: Yoshihiro Shimoda 
> Reviewed-by: Wolfram Sang 
> Reviewed-by: Simon Horman 
> ---

Jens, can we have your ack for this patch so Christoph can take this
series via his tree (also for patch 4/5)?

Thanks,

   Wolfram

>  block/blk-settings.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 2ae348c..45f2c52 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -2,16 +2,16 @@
>  /*
>   * Functions related to setting various queue properties from drivers
>   */
> -#include 
> -#include 
> -#include 
>  #include 
>  #include 
> -#include   /* for max_pfn/max_low_pfn */
>  #include 
> -#include 
> -#include 
>  #include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include  /* for max_pfn/max_low_pfn */
> +#include 
>  
>  #include "blk.h"
>  #include "blk-wbt.h"
> -- 
> 2.7.4
> 


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v9 2/5] iommu/dma: Add a new dma_map_ops of get_merge_boundary()

2019-08-16 Thread Wolfram Sang
On Fri, Jul 26, 2019 at 05:31:13PM +0900, Yoshihiro Shimoda wrote:
> This patch adds a new dma_map_ops of get_merge_boundary() to
> expose the DMA merge boundary if the domain type is IOMMU_DOMAIN_DMA.
> 
> Signed-off-by: Yoshihiro Shimoda 
> Reviewed-by: Simon Horman 

Joerg, can we have your ack for this patch so Christoph can take this
series via his tree?

Thanks,

   Wolfram

> ---
>  drivers/iommu/dma-iommu.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index a7f9c3e..2992ce4 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -1085,6 +1085,16 @@ static int iommu_dma_get_sgtable(struct device *dev, 
> struct sg_table *sgt,
>   return ret;
>  }
>  
> +static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
> +{
> + struct iommu_domain *domain = iommu_get_dma_domain(dev);
> +
> + if (domain->type != IOMMU_DOMAIN_DMA)
> + return 0;   /* can't merge */
> +
> + return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
> +}
> +
>  static const struct dma_map_ops iommu_dma_ops = {
>   .alloc  = iommu_dma_alloc,
>   .free   = iommu_dma_free,
> @@ -1100,6 +1110,7 @@ static const struct dma_map_ops iommu_dma_ops = {
>   .sync_sg_for_device = iommu_dma_sync_sg_for_device,
>   .map_resource   = iommu_dma_map_resource,
>   .unmap_resource = iommu_dma_unmap_resource,
> + .get_merge_boundary = iommu_dma_get_merge_boundary,
>  };
>  
>  /*
> -- 
> 2.7.4
> 


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [Freedreno] [PATCH v3 0/2] iommu/arm-smmu: Split pagetable support

2019-08-16 Thread Robin Murphy

On 16/08/2019 19:12, Rob Clark wrote:

On Fri, Aug 16, 2019 at 9:58 AM Robin Murphy  wrote:


Hi Jordan,

On 15/08/2019 16:33, Jordan Crouse wrote:

On Wed, Aug 07, 2019 at 04:21:38PM -0600, Jordan Crouse wrote:

(Sigh, resend. I freaked out my SMTP server)

This is part of an ongoing evolution for enabling split pagetable support for
arm-smmu. Previous versions can be found [1].

In the discussion for v2 Robin pointed out that this is a very Adreno specific
use case and that is exactly true. Not only do we want to configure and use a
pagetable in the TTBR1 space, we also want to configure the TTBR0 region but
not allocate a pagetable for it or touch it until the GPU hardware does so. As
much as I want it to be a generic concept it really isn't.

This revision leans into that idea. Most of the same io-pgtable code is there
but now it is wrapped as an Adreno GPU specific format that is selected by the
compatible string in the arm-smmu device.

Additionally, per Robin's suggestion we are skipping creating a TTBR0 pagetable
to save on wasted memory.

This isn't as clean as I would like it to be but I think that this is a better
direction than trying to pretend that the generic format would work.

I'm tempting fate by posting this and then taking some time off, but I wanted
to try to kick off a conversation or at least get some flames so I can try to
refine this again next week. Please take a look and give some advice on the
direction.


Will, Robin -

Modulo the impl changes from Robin, do you think that using a dedicated
pagetable format is the right approach for supporting split pagetables for the
Adreno GPU?


How many different Adreno drivers would benefit from sharing it?


Hypothetically everything back to a3xx, so I *could* see usefulness of
this in qcom_iommu (or maybe even msm-iommu).  OTOH maybe with
"modularizing" arm-smmu we could re-combine qcom_iommu and arm-smmu.


Indeed, that's certainly something I'm planning to investigate as a 
future refactoring step.



And as a practical matter, I'm not sure if anyone will get around to
backporting per-context pagetables as far back as a3xx.

BR,
-R


The more I come back to this, the more I'm convinced that io-pgtable
should focus on the heavy lifting of pagetable management - the code
that nobody wants to have to write at all, let alone more than once -
and any subtleties which aren't essential to that should be pushed back
into whichever callers actually care. Consider that already, literally
no caller actually uses an unmodified stage 1 TCR value as provided in
the io_pgtable_cfg.

I feel it would be most productive to elaborate further in the form of
patches, so let me get right on that and try to bash something out
before I go home tonight...


...and now there's a rough WIP branch here:

http://linux-arm.org/git?p=linux-rm.git;a=shortlog;h=refs/heads/iommu/pgtable

I'll finish testing and polishing those patches at some point next week, 
probably, but hopefully they're sufficiently illustrative for the moment.


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: DMA-API: cacheline tracking ENOMEM, dma-debug disabled due to nouveau ?

2019-08-16 Thread Daniel Vetter
On Fri, Aug 16, 2019 at 4:31 PM Corentin Labbe
 wrote:
> On Wed, Aug 14, 2019 at 07:49:27PM +0200, Daniel Vetter wrote:
> > On Wed, Aug 14, 2019 at 04:50:33PM +0200, Corentin Labbe wrote:
> > > Hello
> > >
> > > Since lot of release (at least since 4.19), I hit the following error 
> > > message:
> > > DMA-API: cacheline tracking ENOMEM, dma-debug disabled
> > >
> > > After hitting that, I try to check who is creating so many DMA mapping 
> > > and see:
> > > cat /sys/kernel/debug/dma-api/dump | cut -d' ' -f2 | sort | uniq -c
> > >   6 ahci
> > > 257 e1000e
> > >   6 ehci-pci
> > >5891 nouveau
> > >  24 uhci_hcd
> > >
> > > Does nouveau having this high number of DMA mapping is normal ?
> >
> > Yeah seems perfectly fine for a gpu.
>
> Note that it never go down and when I terminate my X session, it stays the 
> same.
> So without any "real" GPU work, does it is still normal to have so many 
> active mapping ?

Might just be the dma_alloc cache. It should go down under memory
pressure I think. Otherwise might also be a leak.

> For example, when doing some transfer, the ahci mapping number changes and 
> then always go down to 6.

gpu drivers tend to cache everything, all the time ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Freedreno] [PATCH v3 0/2] iommu/arm-smmu: Split pagetable support

2019-08-16 Thread Rob Clark
On Fri, Aug 16, 2019 at 9:58 AM Robin Murphy  wrote:
>
> Hi Jordan,
>
> On 15/08/2019 16:33, Jordan Crouse wrote:
> > On Wed, Aug 07, 2019 at 04:21:38PM -0600, Jordan Crouse wrote:
> >> (Sigh, resend. I freaked out my SMTP server)
> >>
> >> This is part of an ongoing evolution for enabling split pagetable support 
> >> for
> >> arm-smmu. Previous versions can be found [1].
> >>
> >> In the discussion for v2 Robin pointed out that this is a very Adreno 
> >> specific
> >> use case and that is exactly true. Not only do we want to configure and 
> >> use a
> >> pagetable in the TTBR1 space, we also want to configure the TTBR0 region 
> >> but
> >> not allocate a pagetable for it or touch it until the GPU hardware does 
> >> so. As
> >> much as I want it to be a generic concept it really isn't.
> >>
> >> This revision leans into that idea. Most of the same io-pgtable code is 
> >> there
> >> but now it is wrapped as an Adreno GPU specific format that is selected by 
> >> the
> >> compatible string in the arm-smmu device.
> >>
> >> Additionally, per Robin's suggestion we are skipping creating a TTBR0 
> >> pagetable
> >> to save on wasted memory.
> >>
> >> This isn't as clean as I would like it to be but I think that this is a 
> >> better
> >> direction than trying to pretend that the generic format would work.
> >>
> >> I'm tempting fate by posting this and then taking some time off, but I 
> >> wanted
> >> to try to kick off a conversation or at least get some flames so I can try 
> >> to
> >> refine this again next week. Please take a look and give some advice on the
> >> direction.
> >
> > Will, Robin -
> >
> > Modulo the impl changes from Robin, do you think that using a dedicated
> > pagetable format is the right approach for supporting split pagetables for 
> > the
> > Adreno GPU?
>
> How many different Adreno drivers would benefit from sharing it?

Hypothetically everything back to a3xx, so I *could* see usefulness of
this in qcom_iommu (or maybe even msm-iommu).  OTOH maybe with
"modularizing" arm-smmu we could re-combine qcom_iommu and arm-smmu.
And as a practical matter, I'm not sure if anyone will get around to
backporting per-context pagetables as far back as a3xx.

BR,
-R

> The more I come back to this, the more I'm convinced that io-pgtable
> should focus on the heavy lifting of pagetable management - the code
> that nobody wants to have to write at all, let alone more than once -
> and any subtleties which aren't essential to that should be pushed back
> into whichever callers actually care. Consider that already, literally
> no caller actually uses an unmodified stage 1 TCR value as provided in
> the io_pgtable_cfg.
>
> I feel it would be most productive to elaborate further in the form of
> patches, so let me get right on that and try to bash something out
> before I go home tonight...
>
> Robin.
>
> > If so, then is adding the changes to io-pgtable-arm.c possible for 5.4 and 
> > then
> > add the implementation specific code on top of Robin's stack later or do you
> > feel they should come as part of a package deal?
> >
> > Jordan
> >
> >> Jordan Crouse (2):
> >>iommu/io-pgtable-arm: Add support for ARM_ADRENO_GPU_LPAE io-pgtable
> >>  format
> >>iommu/arm-smmu: Add support for Adreno GPU pagetable formats
> >>
> >>   drivers/iommu/arm-smmu.c   |   8 +-
> >>   drivers/iommu/io-pgtable-arm.c | 214 
> >> ++---
> >>   drivers/iommu/io-pgtable.c |   1 +
> >>   include/linux/io-pgtable.h |   2 +
> >>   4 files changed, 209 insertions(+), 16 deletions(-)
> >>
> >> --
> >> 2.7.4
> >>
> >> ___
> >> Freedreno mailing list
> >> freedr...@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/freedreno
> >
> ___
> Freedreno mailing list
> freedr...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 6/6] arm64: document the choice of page attributes for pgprot_dmacoherent

2019-08-16 Thread Will Deacon
On Fri, Aug 16, 2019 at 07:59:42PM +0200, Christoph Hellwig wrote:
> On Fri, Aug 16, 2019 at 06:31:18PM +0100, Will Deacon wrote:
> > Mind if I tweak the second sentence to be:
> > 
> >   This is different from "Device-nGnR[nE]" memory which is intended for MMIO
> >   and thus forbids speculation, preserves access size, requires strict
> >   alignment and can also force write responses to come from the endpoint.
> > 
> > ? It's a small change, but it better fits with the arm64 terminology
> > ("strongly ordered" is no longer used in the architecture).
> > 
> > If you're happy with that, I can make the change and queue this patch
> > for 5.4.
> 
> I'm fine with the change, but you really need this series as base,
> as there is no pgprot_dmacoherent before the series.  So I think I'll
> have to queue it up if we want it for 5.4, and I'll need a few more
> reviews for the other patches in this series first.

Ah, I didn't think about the contextual stuff. In which case, with my
change in wording:

Acked-by: Will Deacon 

and feel free to route it with the rest.

Thanks,

Will


Re: [PATCH 6/6] arm64: document the choice of page attributes for pgprot_dmacoherent

2019-08-16 Thread Christoph Hellwig
On Fri, Aug 16, 2019 at 06:31:18PM +0100, Will Deacon wrote:
> Mind if I tweak the second sentence to be:
> 
>   This is different from "Device-nGnR[nE]" memory which is intended for MMIO
>   and thus forbids speculation, preserves access size, requires strict
>   alignment and can also force write responses to come from the endpoint.
> 
> ? It's a small change, but it better fits with the arm64 terminology
> ("strongly ordered" is no longer used in the architecture).
> 
> If you're happy with that, I can make the change and queue this patch
> for 5.4.

I'm fine with the change, but you really need this series as base,
as there is no pgprot_dmacoherent before the series.  So I think I'll
have to queue it up if we want it for 5.4, and I'll need a few more
reviews for the other patches in this series first.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 6/6] arm64: document the choice of page attributes for pgprot_dmacoherent

2019-08-16 Thread Mark Rutland
On Fri, Aug 16, 2019 at 06:31:18PM +0100, Will Deacon wrote:
> Hi Christoph,
> 
> Thanks for spinning this into a patch.
> 
> On Fri, Aug 16, 2019 at 09:07:54AM +0200, Christoph Hellwig wrote:
> > Based on an email from Will Deacon.
> > 
> > Signed-off-by: Christoph Hellwig 
> > ---
> >  arch/arm64/include/asm/pgtable.h | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/pgtable.h 
> > b/arch/arm64/include/asm/pgtable.h
> > index 6700371227d1..6ff221d9a631 100644
> > --- a/arch/arm64/include/asm/pgtable.h
> > +++ b/arch/arm64/include/asm/pgtable.h
> > @@ -435,6 +435,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> > __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | 
> > PTE_PXN | PTE_UXN)
> >  #define pgprot_device(prot) \
> > __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) 
> > | PTE_PXN | PTE_UXN)
> > +/*
> > + * DMA allocations for non-coherent devices use what the Arm architecture 
> > calls
> > + * "Normal non-cacheable" memory, which permits speculation, unaligned 
> > accesses
> > + * and merging of writes.  This is different from "Strongly Ordered" memory
> > + * which is intended for MMIO and thus forbids speculation, preserves 
> > access
> > + * size, requires strict alignment and also forces write responses to come 
> > from
> > + * the endpoint.
> > + */
> 
> Mind if I tweak the second sentence to be:
> 
>   This is different from "Device-nGnR[nE]" memory which is intended for MMIO
>   and thus forbids speculation, preserves access size, requires strict
>   alignment and can also force write responses to come from the endpoint.
> 
> ? It's a small change, but it better fits with the arm64 terminology
> ("strongly ordered" is no longer used in the architecture).
> 
> If you're happy with that, I can make the change and queue this patch
> for 5.4.

FWIW, with that wording:

Acked-by: Mark Rutland 

Mark.


Re: [PATCH 6/6] arm64: document the choice of page attributes for pgprot_dmacoherent

2019-08-16 Thread Will Deacon
Hi Christoph,

Thanks for spinning this into a patch.

On Fri, Aug 16, 2019 at 09:07:54AM +0200, Christoph Hellwig wrote:
> Based on an email from Will Deacon.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/arm64/include/asm/pgtable.h | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h 
> b/arch/arm64/include/asm/pgtable.h
> index 6700371227d1..6ff221d9a631 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -435,6 +435,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
>   __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | 
> PTE_PXN | PTE_UXN)
>  #define pgprot_device(prot) \
>   __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) 
> | PTE_PXN | PTE_UXN)
> +/*
> + * DMA allocations for non-coherent devices use what the Arm architecture 
> calls
> + * "Normal non-cacheable" memory, which permits speculation, unaligned 
> accesses
> + * and merging of writes.  This is different from "Strongly Ordered" memory
> + * which is intended for MMIO and thus forbids speculation, preserves access
> + * size, requires strict alignment and also forces write responses to come 
> from
> + * the endpoint.
> + */

Mind if I tweak the second sentence to be:

  This is different from "Device-nGnR[nE]" memory which is intended for MMIO
  and thus forbids speculation, preserves access size, requires strict
  alignment and can also force write responses to come from the endpoint.

? It's a small change, but it better fits with the arm64 terminology
("strongly ordered" is no longer used in the architecture).

If you're happy with that, I can make the change and queue this patch
for 5.4.

Thanks,

Will


Re: [Freedreno] [PATCH v3 0/2] iommu/arm-smmu: Split pagetable support

2019-08-16 Thread Robin Murphy

Hi Jordan,

On 15/08/2019 16:33, Jordan Crouse wrote:

On Wed, Aug 07, 2019 at 04:21:38PM -0600, Jordan Crouse wrote:

(Sigh, resend. I freaked out my SMTP server)

This is part of an ongoing evolution for enabling split pagetable support for
arm-smmu. Previous versions can be found [1].

In the discussion for v2 Robin pointed out that this is a very Adreno specific
use case and that is exactly true. Not only do we want to configure and use a
pagetable in the TTBR1 space, we also want to configure the TTBR0 region but
not allocate a pagetable for it or touch it until the GPU hardware does so. As
much as I want it to be a generic concept it really isn't.

This revision leans into that idea. Most of the same io-pgtable code is there
but now it is wrapped as an Adreno GPU specific format that is selected by the
compatible string in the arm-smmu device.

Additionally, per Robin's suggestion we are skipping creating a TTBR0 pagetable
to save on wasted memory.

This isn't as clean as I would like it to be but I think that this is a better
direction than trying to pretend that the generic format would work.

I'm tempting fate by posting this and then taking some time off, but I wanted
to try to kick off a conversation or at least get some flames so I can try to
refine this again next week. Please take a look and give some advice on the
direction.


Will, Robin -

Modulo the impl changes from Robin, do you think that using a dedicated
pagetable format is the right approach for supporting split pagetables for the
Adreno GPU?


How many different Adreno drivers would benefit from sharing it?

The more I come back to this, the more I'm convinced that io-pgtable 
should focus on the heavy lifting of pagetable management - the code 
that nobody wants to have to write at all, let alone more than once - 
and any subtleties which aren't essential to that should be pushed back 
into whichever callers actually care. Consider that already, literally 
no caller actually uses an unmodified stage 1 TCR value as provided in 
the io_pgtable_cfg.


I feel it would be most productive to elaborate further in the form of 
patches, so let me get right on that and try to bash something out 
before I go home tonight...


Robin.


If so, then is adding the changes to io-pgtable-arm.c possible for 5.4 and then
add the implementation specific code on top of Robin's stack later or do you
feel they should come as part of a package deal?

Jordan


Jordan Crouse (2):
   iommu/io-pgtable-arm: Add support for ARM_ADRENO_GPU_LPAE io-pgtable
 format
   iommu/arm-smmu: Add support for Adreno GPU pagetable formats

  drivers/iommu/arm-smmu.c   |   8 +-
  drivers/iommu/io-pgtable-arm.c | 214 ++---
  drivers/iommu/io-pgtable.c |   1 +
  include/linux/io-pgtable.h |   2 +
  4 files changed, 209 insertions(+), 16 deletions(-)

--
2.7.4

___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 03/11] xen/arm: pass one less argument to dma_cache_maint

2019-08-16 Thread Christoph Hellwig
On Fri, Aug 16, 2019 at 02:37:58PM +0100, Robin Murphy wrote:
> On 16/08/2019 14:00, Christoph Hellwig wrote:
>> Instead of taking apart the dma address in both callers do it inside
>> dma_cache_maint itself.
>>
>> Signed-off-by: Christoph Hellwig 
>> ---
>>   arch/arm/xen/mm.c | 10 ++
>>   1 file changed, 6 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
>> index 90574d89d0d4..d9da24fda2f7 100644
>> --- a/arch/arm/xen/mm.c
>> +++ b/arch/arm/xen/mm.c
>> @@ -43,13 +43,15 @@ static bool hypercall_cflush = false;
>> /* functions called by SWIOTLB */
>>   -static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
>> -size_t size, enum dma_data_direction dir, enum dma_cache_op op)
>> +static void dma_cache_maint(dma_addr_t handle, size_t size,
>> +enum dma_data_direction dir, enum dma_cache_op op)
>>   {
>>  struct gnttab_cache_flush cflush;
>>  unsigned long xen_pfn;
>> +unsigned long offset = handle & ~PAGE_MASK;
>>  size_t left = size;
>>   +  offset &= PAGE_MASK;
>
> Ahem... presumably that should be handle, not offset.

Ooops, yes.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: regression in ath10k dma allocation

2019-08-16 Thread Christoph Hellwig
Hi Tobias,

do you have CONFIG_DMA_CMA set in your config?  If not please make sure
you have this commit in your testing tree, and if the problem still
persists it would be a little odd and we'd have to dig deeper:

commit dd3dcede9fa0a0b661ac1f24843f4a1b1317fdb6
Author: Nicolin Chen 
Date:   Wed May 29 17:54:25 2019 -0700

dma-contiguous: fix !CONFIG_DMA_CMA version of dma_{alloc, 
free}_contiguous()

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


regression in ath10k dma allocation

2019-08-16 Thread Tobias Klausmann

Hello all,

within the current development cycle i noted the ath10k driver failing 
to setup:


[    3.185660] ath10k_pci :02:00.0: failed to alloc CE dest ring 1: -12
[    3.185664] ath10k_pci :02:00.0: failed to allocate copy engine 
pipe 1: -12
[    3.185667] ath10k_pci :02:00.0: failed to allocate copy engine 
pipes: -12

[    3.185669] ath10k_pci :02:00.0: failed to setup resource: -12
[    3.185692] ath10k_pci: probe of :02:00.0 failed with error -12

the actual failure comes from [1] and indeed bisecting brought me to a 
related commit "dma-contiguous: add dma_{alloc,free}_contiguous() 
helpers" [2]. Reverting the commit fixes the problem, yet this might 
just be the driver abusing the dma infrastructure, so hopefully someone 
can have a look at it, as i'm not familiar with the code!



[1]: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/wireless/ath/ath10k/ce.c?h=v5.3-rc4#n1650


[2]: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b1d2dc009dece4cd7e629419b52266ba51960a6b



Greetings,

Tobias

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v3 hmm 00/11] Add mmu_notifier_get/put for managing mmu notifier registrations

2019-08-16 Thread Jason Gunthorpe
On Tue, Aug 06, 2019 at 08:15:37PM -0300, Jason Gunthorpe wrote:
> This series is already entangled with patches in the hmm & RDMA tree and
> will require some git topic branches for the RDMA ODP stuff. I intend for
> it to go through the hmm tree.

> Jason Gunthorpe (11):
>   mm/mmu_notifiers: hoist do_mmu_notifier_register down_write to the
> caller
>   mm/mmu_notifiers: do not speculatively allocate a mmu_notifier_mm
>   mm/mmu_notifiers: add a get/put scheme for the registration
>   misc/sgi-gru: use mmu_notifier_get/put for struct gru_mm_struct
>   hmm: use mmu_notifier_get/put for 'struct hmm'
>   drm/radeon: use mmu_notifier_get/put for struct radeon_mn
>   drm/amdkfd: fix a use after free race with mmu_notifer unregister
>   drm/amdkfd: use mmu_notifier_put

Other than these patches:

>   RDMA/odp: use mmu_notifier_get/put for 'struct ib_ucontext_per_mm'
>   RDMA/odp: remove ib_ucontext from ib_umem
>   mm/mmu_notifiers: remove unregister_no_release

This series has been applied.

I will apply the ODP patches when the series they depend on is merged
to the RDMA tree

Any further acks/remarks I will annotate, thanks in advance

Thanks to all reviewers,
Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: DMA-API: cacheline tracking ENOMEM, dma-debug disabled due to nouveau ?

2019-08-16 Thread Corentin Labbe
On Wed, Aug 14, 2019 at 07:49:27PM +0200, Daniel Vetter wrote:
> On Wed, Aug 14, 2019 at 04:50:33PM +0200, Corentin Labbe wrote:
> > Hello
> > 
> > Since lot of release (at least since 4.19), I hit the following error 
> > message:
> > DMA-API: cacheline tracking ENOMEM, dma-debug disabled
> > 
> > After hitting that, I try to check who is creating so many DMA mapping and 
> > see:
> > cat /sys/kernel/debug/dma-api/dump | cut -d' ' -f2 | sort | uniq -c
> >   6 ahci
> > 257 e1000e
> >   6 ehci-pci
> >5891 nouveau
> >  24 uhci_hcd
> > 
> > Does nouveau having this high number of DMA mapping is normal ?
> 
> Yeah seems perfectly fine for a gpu.

Note that it never go down and when I terminate my X session, it stays the same.
So without any "real" GPU work, does it is still normal to have so many active 
mapping ?

For example, when doing some transfer, the ahci mapping number changes and then 
always go down to 6.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 03/11] xen/arm: pass one less argument to dma_cache_maint

2019-08-16 Thread Robin Murphy

On 16/08/2019 14:00, Christoph Hellwig wrote:

Instead of taking apart the dma address in both callers do it inside
dma_cache_maint itself.

Signed-off-by: Christoph Hellwig 
---
  arch/arm/xen/mm.c | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 90574d89d0d4..d9da24fda2f7 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -43,13 +43,15 @@ static bool hypercall_cflush = false;
  
  /* functions called by SWIOTLB */
  
-static void dma_cache_maint(dma_addr_t handle, unsigned long offset,

-   size_t size, enum dma_data_direction dir, enum dma_cache_op op)
+static void dma_cache_maint(dma_addr_t handle, size_t size,
+   enum dma_data_direction dir, enum dma_cache_op op)
  {
struct gnttab_cache_flush cflush;
unsigned long xen_pfn;
+   unsigned long offset = handle & ~PAGE_MASK;
size_t left = size;
  
+	offset &= PAGE_MASK;


Ahem... presumably that should be handle, not offset.

Robin.


xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
offset %= XEN_PAGE_SIZE;
  
@@ -86,13 +88,13 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,

  static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
  {
-   dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
DMA_UNMAP);
+   dma_cache_maint(handle, size, dir, DMA_UNMAP);
  }
  
  static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,

size_t size, enum dma_data_direction dir)
  {
-   dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
DMA_MAP);
+   dma_cache_maint(handle, size, dir, DMA_MAP);
  }
  
  void __xen_dma_map_page(struct device *hwdev, struct page *page,




[PATCH 08/11] swiotlb-xen: use the same foreign page check everywhere

2019-08-16 Thread Christoph Hellwig
xen_dma_map_page uses a different and more complicated check for
foreign pages than the other three cache maintainance helpers.
Switch it to the simpler pfn_vali method a well.

Signed-off-by: Christoph Hellwig 
---
 include/xen/page-coherent.h | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/xen/page-coherent.h b/include/xen/page-coherent.h
index 7c32944de051..0f4d468e7a89 100644
--- a/include/xen/page-coherent.h
+++ b/include/xen/page-coherent.h
@@ -43,14 +43,9 @@ static inline void xen_dma_map_page(struct device *hwdev, 
struct page *page,
 dma_addr_t dev_addr, unsigned long offset, size_t size,
 enum dma_data_direction dir, unsigned long attrs)
 {
-   unsigned long page_pfn = page_to_xen_pfn(page);
-   unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
-   unsigned long compound_pages =
-   (1

[PATCH 11/11] arm64: use asm-generic/dma-mapping.h

2019-08-16 Thread Christoph Hellwig
Now that the Xen special cases are gone nothing worth mentioning is
left in the arm64  file, so switch to use the
asm-generic version instead.

Signed-off-by: Christoph Hellwig 
---
 arch/arm64/include/asm/Kbuild|  1 +
 arch/arm64/include/asm/dma-mapping.h | 22 --
 arch/arm64/mm/dma-mapping.c  |  1 +
 3 files changed, 2 insertions(+), 22 deletions(-)
 delete mode 100644 arch/arm64/include/asm/dma-mapping.h

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index c52e151afab0..98a5405c8558 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += delay.h
 generic-y += div64.h
 generic-y += dma.h
 generic-y += dma-contiguous.h
+generic-y += dma-mapping.h
 generic-y += early_ioremap.h
 generic-y += emergency-restart.h
 generic-y += hw_irq.h
diff --git a/arch/arm64/include/asm/dma-mapping.h 
b/arch/arm64/include/asm/dma-mapping.h
deleted file mode 100644
index 67243255a858..
--- a/arch/arm64/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Ltd.
- */
-#ifndef __ASM_DMA_MAPPING_H
-#define __ASM_DMA_MAPPING_H
-
-#ifdef __KERNEL__
-
-#include 
-#include 
-
-#include 
-#include 
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-   return NULL;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_DMA_MAPPING_H */
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4b244a037349..6578abcfbbc7 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 10/11] swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page

2019-08-16 Thread Christoph Hellwig
No need for a no-op wrapper.

Signed-off-by: Christoph Hellwig 
---
 drivers/xen/swiotlb-xen.c | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c3c383033ae4..b6b9c4c1b397 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -414,9 +414,8 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
-static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-size_t size, enum dma_data_direction dir,
-unsigned long attrs)
+static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+   size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
phys_addr_t paddr = xen_bus_to_phys(dev_addr);
 
@@ -430,13 +429,6 @@ static void xen_unmap_single(struct device *hwdev, 
dma_addr_t dev_addr,
swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
 }
 
-static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   xen_unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
 static void
 xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
@@ -477,7 +469,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct 
scatterlist *sgl, int nelems,
BUG_ON(dir == DMA_NONE);
 
for_each_sg(sgl, sg, nelems, i)
-   xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, 
attrs);
+   xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
+   dir, attrs);
 
 }
 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 07/11] swiotlb-xen: provide a single page-coherent.h header

2019-08-16 Thread Christoph Hellwig
Merge the various page-coherent.h files into a single one that either
provides prototypes or stubs depending on the need for cache
maintainance.

For extra benefits alo include  in the file
actually implementing the interfaces provided.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/include/asm/xen/page-coherent.h   |  2 --
 arch/arm/xen/mm.c  |  1 +
 arch/arm64/include/asm/xen/page-coherent.h |  2 --
 arch/x86/include/asm/xen/page-coherent.h   | 22 --
 drivers/xen/swiotlb-xen.c  |  4 +---
 include/Kbuild |  2 +-
 include/xen/{arm => }/page-coherent.h  | 27 +++---
 7 files changed, 27 insertions(+), 33 deletions(-)
 delete mode 100644 arch/arm/include/asm/xen/page-coherent.h
 delete mode 100644 arch/arm64/include/asm/xen/page-coherent.h
 delete mode 100644 arch/x86/include/asm/xen/page-coherent.h
 rename include/xen/{arm => }/page-coherent.h (76%)

diff --git a/arch/arm/include/asm/xen/page-coherent.h 
b/arch/arm/include/asm/xen/page-coherent.h
deleted file mode 100644
index 27e984977402..
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include 
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index a59980f1aa54..85482cdda1e5 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/arm64/include/asm/xen/page-coherent.h 
b/arch/arm64/include/asm/xen/page-coherent.h
deleted file mode 100644
index 27e984977402..
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include 
diff --git a/arch/x86/include/asm/xen/page-coherent.h 
b/arch/x86/include/asm/xen/page-coherent.h
deleted file mode 100644
index 8ee33c5edded..
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
-#define _ASM_X86_XEN_PAGE_COHERENT_H
-
-#include 
-#include 
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-dma_addr_t dev_addr, unsigned long offset, size_t size,
-enum dma_data_direction dir, unsigned long attrs) { }
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs) { }
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-   dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-   dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index f9dd4cb6e4b3..7b23929854e7 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -31,12 +31,10 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
-#include 
-#include 
-
 #include 
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
diff --git a/include/Kbuild b/include/Kbuild
index c38f0d46b267..e2ae52ef9e1e 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -1189,7 +1189,6 @@ header-test-  += video/vga.h
 header-test-   += video/w100fb.h
 header-test-   += xen/acpi.h
 header-test-   += xen/arm/hypercall.h
-header-test-   += xen/arm/page-coherent.h
 header-test-   += xen/arm/page.h
 header-test-   += xen/balloon.h
 header-test-   += xen/events.h
@@ -1231,6 +1230,7 @@ header-test-  += xen/interface/xen.h
 header-test-   += xen/interface/xenpmu.h
 header-test-   += xen/mem-reservation.h
 header-test-   += xen/page.h
+header-test-   += xen/page-coherent.h
 header-test-   += xen/platform_pci.h
 header-test-   += xen/swiotlb-xen.h
 header-test-   += xen/xen-front-pgdir-shbuf.h
diff --git a/include/xen/arm/page-coherent.h b/include/xen/page-coherent.h
similarity index 76%
rename from include/xen/arm/page-coherent.h
rename to include/xen/page-coherent.h
index 4294a31305ca..7c32944de051 100644
--- a/include/xen/arm/page-coherent.h
+++ b/include/xen/page-coherent.h
@@ -1,10 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _XEN_ARM_PAGE_COHERENT_H
-#define _XEN_ARM_PAGE_COHERENT_H
+#ifndef _XEN_PAGE_COHERENT_H
+#define _XEN_PAGE_COHERENT_H
 
 #include 
 #include 
 
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU)
 void __xen_dma_map_page(struct device *hwdev, struct page *page,
 dma_addr_t dev_addr, unsigned long offse

[PATCH 06/11] swiotlb-xen: always use dma-direct helpers to alloc coherent pages

2019-08-16 Thread Christoph Hellwig
x86 currently calls alloc_pages, but using dma-direct works as well
there, with the added benefit of using the CMA pool if available.
The biggest advantage is of course to remove a pointless bit of
architecture specific code.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/include/asm/xen/page-coherent.h | 16 
 drivers/xen/swiotlb-xen.c|  7 +++
 include/xen/arm/page-coherent.h  | 12 
 3 files changed, 3 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/xen/page-coherent.h 
b/arch/x86/include/asm/xen/page-coherent.h
index 116777e7f387..8ee33c5edded 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -5,22 +5,6 @@
 #include 
 #include 
 
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-   dma_addr_t *dma_handle, gfp_t flags,
-   unsigned long attrs)
-{
-   void *vstart = (void*)__get_free_pages(flags, get_order(size));
-   *dma_handle = virt_to_phys(vstart);
-   return vstart;
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-   void *cpu_addr, dma_addr_t dma_handle,
-   unsigned long attrs)
-{
-   free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
 static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
 dma_addr_t dev_addr, unsigned long offset, size_t size,
 enum dma_data_direction dir, unsigned long attrs) { }
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b8808677ae1d..f9dd4cb6e4b3 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -299,8 +299,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
size,
 * address. In fact on ARM virt_to_phys only works for kernel direct
 * mapped RAM memory. Also see comment below.
 */
-   ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
-
+   ret = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
if (!ret)
return ret;
 
@@ -319,7 +318,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
size,
else {
if (xen_create_contiguous_region(phys, order,
 fls64(dma_mask), dma_handle) 
!= 0) {
-   xen_free_coherent_pages(hwdev, size, ret, 
(dma_addr_t)phys, attrs);
+   dma_direct_free(hwdev, size, ret, (dma_addr_t)phys, 
attrs);
return NULL;
}
SetPageXenRemapped(virt_to_page(ret));
@@ -351,7 +350,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
size, void *vaddr,
TestClearPageXenRemapped(virt_to_page(vaddr)))
xen_destroy_contiguous_region(phys, order);
 
-   xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
+   dma_direct_free(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
 }
 
 /*
diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
index da2cc09c8eda..4294a31305ca 100644
--- a/include/xen/arm/page-coherent.h
+++ b/include/xen/arm/page-coherent.h
@@ -16,18 +16,6 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev,
 void __xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir);
 
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-   dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
-{
-   return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-   void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
-{
-   dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
-}
-
 static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 01/11] xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance

2019-08-16 Thread Christoph Hellwig
Reuse the arm64 code that uses the dma-direct/swiotlb helpers for DMA
non-coherent devices.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/Kconfig   |  4 +
 arch/arm/include/asm/device.h  |  3 -
 arch/arm/include/asm/xen/page-coherent.h   | 93 --
 arch/arm/mm/Kconfig|  4 -
 arch/arm/mm/dma-mapping.c  |  8 +-
 arch/arm64/include/asm/xen/page-coherent.h | 75 -
 drivers/xen/swiotlb-xen.c  | 49 +---
 include/xen/arm/page-coherent.h| 71 +
 8 files changed, 78 insertions(+), 229 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 33b00579beff..24360211534a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,6 +7,8 @@ config ARM
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
+   select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB
+   select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD
@@ -18,6 +20,8 @@ config ARM
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_STRICT_MODULE_RWX if MMU
+   select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB
+   select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB
select ARCH_HAS_TEARDOWN_DMA_OPS if MMU
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAVE_CUSTOM_GPIO_H
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index f6955b55c544..c675bc0d5aa8 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -14,9 +14,6 @@ struct dev_archdata {
 #endif
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
struct dma_iommu_mapping*mapping;
-#endif
-#ifdef CONFIG_XEN
-   const struct dma_map_ops *dev_dma_ops;
 #endif
unsigned int dma_coherent:1;
unsigned int dma_ops_setup:1;
diff --git a/arch/arm/include/asm/xen/page-coherent.h 
b/arch/arm/include/asm/xen/page-coherent.h
index 2c403e7c782d..27e984977402 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -1,95 +1,2 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
-#define _ASM_ARM_XEN_PAGE_COHERENT_H
-
-#include 
-#include 
 #include 
-
-static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
-{
-   if (dev && dev->archdata.dev_dma_ops)
-   return dev->archdata.dev_dma_ops;
-   return get_arch_dma_ops(NULL);
-}
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-   dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
-{
-   return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, 
attrs);
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-   void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
-{
-   xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-dma_addr_t dev_addr, unsigned long offset, size_t size,
-enum dma_data_direction dir, unsigned long attrs)
-{
-   unsigned long page_pfn = page_to_xen_pfn(page);
-   unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
-   unsigned long compound_pages =
-   (1unmap_page)
-   xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, 
dir, attrs);
-   } else
-   __xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
-}
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-   dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-   unsigned long pfn = PFN_DOWN(handle);
-   if (pfn_valid(pfn)) {
-   if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
-   xen_get_dma_ops(hwdev)->sync_single_for_cp

[PATCH 09/11] swiotlb-xen: simplify cache maintainance

2019-08-16 Thread Christoph Hellwig
Now that we know we always have the dma-noncoherent.h helpers available
if we are on an architecture with support for non-coherent devices,
we can just call them directly, and remove the calls to the dma-direct
routines, including the fact that we call the dma_direct_map_page
routines but ignore the value returned from it.  Instead we now have
Xen wrappers for the arch_sync_dma_for_{device,cpu} helpers that call
the special Xen versions of those routines for foreign pages.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/xen/mm.c   |  47 ++---
 drivers/xen/swiotlb-xen.c   |  19 ---
 include/xen/page-coherent.h | 100 +++-
 3 files changed, 42 insertions(+), 124 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 85482cdda1e5..0eb88f1355c2 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -86,59 +86,18 @@ static void dma_cache_maint(dma_addr_t handle, size_t size,
} while (left);
 }
 
-static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
-   size_t size, enum dma_data_direction dir)
+void __xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, size_t size,
+   enum dma_data_direction dir)
 {
dma_cache_maint(handle, size, dir, DMA_UNMAP);
 }
 
-static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
+void __xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
 {
dma_cache_maint(handle, size, dir, DMA_MAP);
 }
 
-void __xen_dma_map_page(struct device *hwdev, struct page *page,
-dma_addr_t dev_addr, unsigned long offset, size_t size,
-enum dma_data_direction dir, unsigned long attrs)
-{
-   if (dev_is_dma_coherent(hwdev))
-   return;
-   if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-   return;
-
-   __xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir);
-}
-
-void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-
-{
-   if (dev_is_dma_coherent(hwdev))
-   return;
-   if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-   return;
-
-   __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void __xen_dma_sync_single_for_cpu(struct device *hwdev,
-   dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-   if (dev_is_dma_coherent(hwdev))
-   return;
-   __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void __xen_dma_sync_single_for_device(struct device *hwdev,
-   dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-   if (dev_is_dma_coherent(hwdev))
-   return;
-   __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
-}
-
 bool xen_arch_need_swiotlb(struct device *dev,
   phys_addr_t phys,
   dma_addr_t dev_addr)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 7b23929854e7..c3c383033ae4 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -388,6 +388,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
+   phys = map;
dev_addr = xen_phys_to_bus(map);
 
/*
@@ -399,14 +400,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
return DMA_MAPPING_ERROR;
}
 
-   page = pfn_to_page(map >> PAGE_SHIFT);
-   offset = map & ~PAGE_MASK;
 done:
-   /*
-* we are not interested in the dma_addr returned by xen_dma_map_page,
-* only in the potential cache flushes executed by the function.
-*/
-   xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs);
+   if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   xen_dma_sync_for_device(dev, dev_addr, phys, size, dir);
return dev_addr;
 }
 
@@ -426,7 +422,8 @@ static void xen_unmap_single(struct device *hwdev, 
dma_addr_t dev_addr,
 
BUG_ON(dir == DMA_NONE);
 
-   xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs);
+   if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir);
 
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr))
@@ -446,7 +443,8 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, 
dma_addr_t dma_addr,
 {
phys_addr_t paddr = xen_bus_to_phys(dma_addr);
 
-   xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir);
+   if (!dev_is_dma_coherent(dev))
+   xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir);
 
if (is_xen_swiotlb_buffer(dma_addr))
swiotlb_tbl_sync_sing

[PATCH 02/11] xen/arm: use dev_is_dma_coherent

2019-08-16 Thread Christoph Hellwig
Use the dma-noncoherent dev_is_dma_coherent helper instead of the home
grown variant.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/include/asm/dma-mapping.h   |  6 --
 arch/arm/xen/mm.c| 12 ++--
 arch/arm64/include/asm/dma-mapping.h |  9 -
 3 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/arch/arm/include/asm/dma-mapping.h 
b/arch/arm/include/asm/dma-mapping.h
index dba9355e2484..bdd80ddbca34 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -91,12 +91,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, 
void *addr)
 }
 #endif
 
-/* do not use this function in a driver */
-static inline bool is_device_dma_coherent(struct device *dev)
-{
-   return dev->archdata.dma_coherent;
-}
-
 /**
  * arm_dma_alloc - allocate consistent memory for DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index d33b77e9add3..90574d89d0d4 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -99,7 +99,7 @@ void __xen_dma_map_page(struct device *hwdev, struct page 
*page,
 dma_addr_t dev_addr, unsigned long offset, size_t size,
 enum dma_data_direction dir, unsigned long attrs)
 {
-   if (is_device_dma_coherent(hwdev))
+   if (dev_is_dma_coherent(hwdev))
return;
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
return;
@@ -112,7 +112,7 @@ void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t 
handle,
unsigned long attrs)
 
 {
-   if (is_device_dma_coherent(hwdev))
+   if (dev_is_dma_coherent(hwdev))
return;
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
return;
@@ -123,7 +123,7 @@ void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t 
handle,
 void __xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
-   if (is_device_dma_coherent(hwdev))
+   if (dev_is_dma_coherent(hwdev))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
 }
@@ -131,7 +131,7 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev,
 void __xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
-   if (is_device_dma_coherent(hwdev))
+   if (dev_is_dma_coherent(hwdev))
return;
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
 }
@@ -159,7 +159,7 @@ bool xen_arch_need_swiotlb(struct device *dev,
 * memory and we are not able to flush the cache.
 */
return (!hypercall_cflush && (xen_pfn != bfn) &&
-   !is_device_dma_coherent(dev));
+   !dev_is_dma_coherent(dev));
 }
 
 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
diff --git a/arch/arm64/include/asm/dma-mapping.h 
b/arch/arm64/include/asm/dma-mapping.h
index bdcb0922a40c..67243255a858 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -18,14 +18,5 @@ static inline const struct dma_map_ops 
*get_arch_dma_ops(struct bus_type *bus)
return NULL;
 }
 
-/*
- * Do not use this function in a driver, it is only provided for
- * arch/arm/mm/xen.c, which is used by arm64 as well.
- */
-static inline bool is_device_dma_coherent(struct device *dev)
-{
-   return dev->dma_coherent;
-}
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_DMA_MAPPING_H */
-- 
2.20.1



[PATCH 03/11] xen/arm: pass one less argument to dma_cache_maint

2019-08-16 Thread Christoph Hellwig
Instead of taking apart the dma address in both callers do it inside
dma_cache_maint itself.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/xen/mm.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 90574d89d0d4..d9da24fda2f7 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -43,13 +43,15 @@ static bool hypercall_cflush = false;
 
 /* functions called by SWIOTLB */
 
-static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
-   size_t size, enum dma_data_direction dir, enum dma_cache_op op)
+static void dma_cache_maint(dma_addr_t handle, size_t size,
+   enum dma_data_direction dir, enum dma_cache_op op)
 {
struct gnttab_cache_flush cflush;
unsigned long xen_pfn;
+   unsigned long offset = handle & ~PAGE_MASK;
size_t left = size;
 
+   offset &= PAGE_MASK;
xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
offset %= XEN_PAGE_SIZE;
 
@@ -86,13 +88,13 @@ static void dma_cache_maint(dma_addr_t handle, unsigned 
long offset,
 static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
 {
-   dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
DMA_UNMAP);
+   dma_cache_maint(handle, size, dir, DMA_UNMAP);
 }
 
 static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
 {
-   dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
DMA_MAP);
+   dma_cache_maint(handle, size, dir, DMA_MAP);
 }
 
 void __xen_dma_map_page(struct device *hwdev, struct page *page,
-- 
2.20.1



[PATCH 05/11] xen: remove the exports for xen_{create,destroy}_contiguous_region

2019-08-16 Thread Christoph Hellwig
These routines are only used by swiotlb-xen, which cannot be modular.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/xen/mm.c | 2 --
 arch/x86/xen/mmu_pv.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 388a45002bad..a59980f1aa54 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -175,13 +175,11 @@ int xen_create_contiguous_region(phys_addr_t pstart, 
unsigned int order,
*dma_handle = pstart;
return 0;
 }
-EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
 
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 {
return;
 }
-EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
 int __init xen_mm_init(void)
 {
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 26e8b326966d..c8dbee62ec2a 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2625,7 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, 
unsigned int order,
*dma_handle = virt_to_machine(vstart).maddr;
return success ? 0 : -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
 
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 {
@@ -2660,7 +2659,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, 
unsigned int order)
 
spin_unlock_irqrestore(&xen_reservation_lock, flags);
 }
-EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
 static noinline void xen_flush_tlb_all(void)
 {
-- 
2.20.1



swiotlb-xen cleanups

2019-08-16 Thread Christoph Hellwig
Hi Xen maintainers and friends,

please take a look at this series that cleans up the parts of swiotlb-xen
that deal with non-coherent caches.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 04/11] xen/arm: remove xen_dma_ops

2019-08-16 Thread Christoph Hellwig
arm and arm64 can just use xen_swiotlb_dma_ops directly like x86, no
need for a pointer indirection.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/mm/dma-mapping.c| 3 ++-
 arch/arm/xen/mm.c| 4 
 arch/arm64/mm/dma-mapping.c  | 3 ++-
 include/xen/arm/hypervisor.h | 2 --
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 738097396445..2661cad36359 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "dma.h"
 #include "mm.h"
@@ -2360,7 +2361,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, 
u64 size,
 
 #ifdef CONFIG_XEN
if (xen_initial_domain())
-   dev->dma_ops = xen_dma_ops;
+   dev->dma_ops = &xen_swiotlb_dma_ops;
 #endif
dev->archdata.dma_ops_setup = true;
 }
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index d9da24fda2f7..388a45002bad 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -183,16 +183,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, 
unsigned int order)
 }
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
-const struct dma_map_ops *xen_dma_ops;
-EXPORT_SYMBOL(xen_dma_ops);
-
 int __init xen_mm_init(void)
 {
struct gnttab_cache_flush cflush;
if (!xen_initial_domain())
return 0;
xen_swiotlb_init(1, false);
-   xen_dma_ops = &xen_swiotlb_dma_ops;
 
cflush.op = 0;
cflush.a.dev_bus_addr = 0;
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index bd2b039f43a6..4b244a037349 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -64,6 +65,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 
size,
 
 #ifdef CONFIG_XEN
if (xen_initial_domain())
-   dev->dma_ops = xen_dma_ops;
+   dev->dma_ops = &xen_swiotlb_dma_ops;
 #endif
 }
diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h
index 2982571f7cc1..43ef24dd030e 100644
--- a/include/xen/arm/hypervisor.h
+++ b/include/xen/arm/hypervisor.h
@@ -19,8 +19,6 @@ static inline enum paravirt_lazy_mode 
paravirt_get_lazy_mode(void)
return PARAVIRT_LAZY_NONE;
 }
 
-extern const struct dma_map_ops *xen_dma_ops;
-
 #ifdef CONFIG_XEN
 void __init xen_early_init(void);
 #else
-- 
2.20.1



Re: [PATCH v2 2/2] iommu/arm-smmu-v3: add nr_ats_masters for quickly check

2019-08-16 Thread Leizhen (ThunderTown)



On 2019/8/15 23:23, Will Deacon wrote:
> On Thu, Aug 15, 2019 at 01:44:39PM +0800, Zhen Lei wrote:
>> When (smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS) is true, even if a
>> smmu domain does not contain any ats master, the operations of
>> arm_smmu_atc_inv_to_cmd() and lock protection in arm_smmu_atc_inv_domain()
>> are always executed. This will impact performance, especially in
>> multi-core and stress scenarios. For my FIO test scenario, about 8%
>> performance reduced.
>>
>> In fact, we can use a struct member to record how many ats masters that
>> the smmu contains. And check that without traverse the list and check all
>> masters one by one in the lock protection.
>>
>> Fixes: 9ce27afc0830 ("iommu/arm-smmu-v3: Add support for PCI ATS")
>> Signed-off-by: Zhen Lei 
>> ---
>>  drivers/iommu/arm-smmu-v3.c | 14 +-
>>  1 file changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index 29056d9bb12aa01..154334d3310c9b8 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -631,6 +631,7 @@ struct arm_smmu_domain {
>>  
>>  struct io_pgtable_ops   *pgtbl_ops;
>>  boolnon_strict;
>> +int nr_ats_masters;
>>  
>>  enum arm_smmu_domain_stage  stage;
>>  union {
>> @@ -1531,7 +1532,16 @@ static int arm_smmu_atc_inv_domain(struct 
>> arm_smmu_domain *smmu_domain,
>>  struct arm_smmu_cmdq_ent cmd;
>>  struct arm_smmu_master *master;
>>  
>> -if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
>> +/*
>> + * The protectiom of spinlock(&iommu_domain->devices_lock) is omitted.
>> + * Because for a given master, its map/unmap operations should only be
>> + * happened after it has been attached and before it has been detached.
>> + * So that, if at least one master need to be atc invalidated, the
>> + * value of smmu_domain->nr_ats_masters can not be zero.
>> + *
>> + * This can alleviate performance loss in multi-core scenarios.
>> + */
> 
> I find this reasoning pretty dubious, since I think you're assuming that
> an endpoint cannot issue speculative ATS translation requests once its
> ATS capability is enabled. That said, I think it also means we should enable
> ATS in the STE *before* enabling it in the endpoint -- the current logic
> looks like it's the wrong way round to me (including in detach()).
> 
> Anyway, these speculative translations could race with a concurrent unmap()
> call and end up with the ATC containing translations for unmapped pages,
> which I think we should try to avoid.
> 
> Did the RCU approach not work out? You could use an rwlock instead as a
> temporary bodge if the performance doesn't hurt too much.
OK, I will try rwlock first, this does not change the original code logic.

> 
> Alternatively... maybe we could change the attach flow to do something
> like:
> 
>   enable_ats_in_ste(master);
>   enable_ats_at_pcie_endpoint(master);
>   spin_lock(devices_lock)
>   add_to_device_list(master);
>   nr_ats_masters++;
>   spin_unlock(devices_lock);
>   invalidate_atc(master);
> 
> in which case, the concurrent unmapper will be doing something like:
> 
>   issue_tlbi();
>   smp_mb();
>   if (READ_ONCE(nr_ats_masters)) {
>   ...
>   }
> 
> and I *think* that means that either the unmapper will see the
> nr_ats_masters update and perform the invalidation, or they'll miss
> the update but the attach will invalidate the ATC /after/ the TLBI
> in the command queue.
> 
> Also, John's idea of converting this stuff over to my command batching
> mechanism should help a lot if we can defer this to sync time using the
> gather structure. Maybe an rwlock would be alright for that. Dunno.
> 
> Will
> 
> .
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 00/13] Rework IOMMU API to allow for batching of invalidation

2019-08-16 Thread John Garry

On 15/08/2019 14:55, Will Deacon wrote:

On Thu, Aug 15, 2019 at 12:19:58PM +0100, John Garry wrote:

On 14/08/2019 18:56, Will Deacon wrote:

If you'd like to play with the patches, then I've also pushed them here:

  
https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=iommu/unmap

but they should behave as a no-op on their own.


As anticipated, my storage testing scenarios roughly give parity throughput
and CPU loading before and after this series.

Patches to convert the

Arm SMMUv3 driver to the new API are here:

  
https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=iommu/cmdq


I quickly tested this again and now I see a performance lift:

before (5.3-rc1)after
D05 8x SAS disks907K IOPS   970K IOPS
D05 1x NVMe 450K IOPS   466K IOPS
D06 1x NVMe 467K IOPS   466K IOPS

The CPU loading seems to track throughput, so nothing much to say there.

Note: From 5.2 testing, I was seeing >900K IOPS from that NVMe disk for
!IOMMU.


Cheers, John. For interest, how do things look if you pass iommu.strict=0?
That might give some indication about how much the invalidation is still
hurting us.


So I tested for iommu/cmdq for NVMe only, and I see:

 !SMMU  5.3-rc4 strict/!strict  cmdq strict/!strict
D05 NVMe 750K IOPS  456K/540K IOPS  466K/537K
D06 NVMe 750K IOPS  456K/740K IOPS  466K/745K

I don't know why the D06 iommu.strict performance is ~ same as D05, 
while !strict is so much better. D06 SMMU implementation is supposed to 
be generally much better than that of D05, so I would have thought that 
the strict performance would be better (than that of D05).





BTW, what were your thoughts on changing
arm_smmu_atc_inv_domain()->arm_smmu_atc_inv_master() to batching? It seems
suitable, but looks untouched. Were you waiting for a resolution to the
performance issue which Leizhen reported?


In principle, I'm supportive of such a change, but I'm not currently able
to test any ATS stuff so somebody else would need to write the patch.
Jean-Philippe is on holiday at the moment, but I'd be happy to review
something from you if you send it out.


Unfortunately I don't have anything ATS-enabled either. Not many do, it 
seems.


Cheers,
John



Will

.




___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 4/6] dma-mapping: remove arch_dma_mmap_pgprot

2019-08-16 Thread Geert Uytterhoeven
On Fri, Aug 16, 2019 at 9:19 AM Christoph Hellwig  wrote:
> arch_dma_mmap_pgprot is used for two things:
>
>  1) to override the "normal" uncached page attributes for mapping
> memory coherent to devices that can't snoop the CPU caches
>  2) to provide the special DMA_ATTR_WRITE_COMBINE semantics on older
> arm systems
>
> Replace one with the pgprot_dmacoherent macro that is already provided
> by arm and much simpler to use, and lift the DMA_ATTR_WRITE_COMBINE
> handling to common code with an explicit arch opt-in.
>
> Signed-off-by: Christoph Hellwig 

>  arch/m68k/Kconfig  |  1 -
>  arch/m68k/include/asm/pgtable_mm.h |  3 +++
>  arch/m68k/kernel/dma.c |  3 +--

Acked-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 6/6] driver core: initialize a default DMA mask for platform device

2019-08-16 Thread Geert Uytterhoeven
Hi Christoph,

On Fri, Aug 16, 2019 at 8:30 AM Christoph Hellwig  wrote:
> We still treat devices without a DMA mask as defaulting to 32-bits for
> both mask, but a few releases ago we've started warning about such
> cases, as they require special cases to work around this sloppyness.
> Add a dma_mask field to struct platform_device so that we can initialize
> the dma_mask pointer in struct device and initialize both masks to
> 32-bits by default, replacing similar functionality in m68k and
> powerpc.  The arch_setup_pdev_archdata hooks is now unused and removed.
>
> Note that the code looks a little odd with the various conditionals
> because we have to support platform_device structures that are
> statically allocated.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/m68k/kernel/dma.c   |  9 ---

Acked-by: Geert Uytterhoeven 

>  arch/sh/boards/mach-ecovec24/setup.c |  2 --
>  arch/sh/boards/mach-migor/setup.c|  1 -

Acked-by: Geert Uytterhoeven 
given "[PATCH 0/2] Remove calls to empty arch_setup_pdev_archdata()"
https://lore.kernel.org/linux-renesas-soc/1526641611-2769-1-git-send-email-geert+rene...@glider.be/

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/6] arm64: document the choice of page attributes for pgprot_dmacoherent

2019-08-16 Thread Christoph Hellwig
Based on an email from Will Deacon.

Signed-off-by: Christoph Hellwig 
---
 arch/arm64/include/asm/pgtable.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6700371227d1..6ff221d9a631 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -435,6 +435,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | 
PTE_PXN | PTE_UXN)
 #define pgprot_device(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) 
| PTE_PXN | PTE_UXN)
+/*
+ * DMA allocations for non-coherent devices use what the Arm architecture calls
+ * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
+ * and merging of writes.  This is different from "Strongly Ordered" memory
+ * which is intended for MMIO and thus forbids speculation, preserves access
+ * size, requires strict alignment and also forces write responses to come from
+ * the endpoint.
+ */
 #define pgprot_dmacoherent(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, \
PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
-- 
2.20.1



Re: [PATCH v9 08/21] iommu/io-pgtable-arm-v7s: Extend MediaTek 4GB Mode

2019-08-16 Thread Yong Wu
On Thu, 2019-08-15 at 12:50 +0100, Will Deacon wrote:
> Ok, I think speaking to Robin helped me a bit with this...
> 
> On Thu, Aug 15, 2019 at 06:18:38PM +0800, Yong Wu wrote:
> > On Thu, 2019-08-15 at 10:51 +0100, Will Deacon wrote:
> > > On Thu, Aug 15, 2019 at 04:47:49PM +0800, Yong Wu wrote:
> > > > On Wed, 2019-08-14 at 15:41 +0100, Will Deacon wrote:
> > > > > On Sat, Aug 10, 2019 at 03:58:08PM +0800, Yong Wu wrote:
> > > > > > MediaTek extend the arm v7s descriptor to support the dram over 4GB.
> > > > > > 
> > > > > > In the mt2712 and mt8173, it's called "4GB mode", the physical 
> > > > > > address
> > > > > > is from 0x4000_ to 0x1_3fff_, but from EMI point of view, it
> > > > > > is remapped to high address from 0x1__ to 0x1__, the
> > > > > > bit32 is always enabled. thus, in the M4U, we always enable the bit9
> > > > > > for all PTEs which means to enable bit32 of physical address. Here 
> > > > > > is
> > > > > > the detailed remap relationship in the "4GB mode":
> > > > > > CPU PA ->HW PA
> > > > > > 0x4000_  0x1_4000_ (Add bit32)
> > > > > > 0x8000_  0x1_8000_ ...
> > > > > > 0xc000_  0x1_c000_ ...
> > > > > > 0x1__0x1__ (No change)
> 
> [...]
> 
> > > > > The way I would like this quirk to work is that the io-pgtable code
> > > > > basically sets bit 9 in the pte when bit 32 is set in the physical 
> > > > > address,
> > > > > and sets bit 4 in the pte when bit 33 is set in the physical address. 
> > > > > It
> > > > > would then do the opposite when converting a pte to a physical 
> > > > > address.
> > > > > 
> > > > > That way, your driver can call the page table code directly with the 
> > > > > high
> > > > > addresses and we don't have to do any manual offsetting or range 
> > > > > checking
> > > > > in the page table code.
> > > > 
> > > > In this case, the mt8183 can work successfully while the "4gb
> > > > mode"(mt8173/mt2712) can not.
> > > > 
> > > > In the "4gb mode", As the remap relationship above, we should always add
> > > > bit32 in pte as we did in [2]. and need add bit32 in the
> > > > "iova_to_phys"(Not always add.). That means the "4gb mode" has a special
> > > > flow:
> > > > a. Always add bit32 in paddr_to_iopte.
> > > > b. Add bit32 only when PA < 0x4000 in iopte_to_paddr.
> > > 
> > > I think this is probably at the heart of my misunderstanding. What is so
> > > special about PAs (is this HW PA or CPU PA?) below 0x4000? Is this RAM
> > > or something else?
> > 
> > SRAM and HW register that IOMMU can not access.
> 
> Ok, so redrawing your table from above, I think we can say something like:
> 
> 
> CPU Physical address
> 
> 
> 0G1G  2G  3G  4G  5G
> |---A---|---B---|---C---|---D---|---E---|
> +--I/O--+Memory-+
> 
> 
> IOMMU output physical address
> =
> 
>   4G  5G  6G  7G  8G
>   |---E---|---B---|---C---|---D---|
>   +Memory-+
> 
> 
> Do you agree? 

Quite right.


> If so, what happens to region 'A' (the I/O region) in the
> IOMMU output physical address space. Is it accessible?

No. IOMMU can not access region 'A' above.

> 
> Anyway, I think it's the job of the driver to convert between the two
> address spaces, so that:
> 
>   - On ->map(), bit 32 of the CPU physical address is set before calling
> into the iopgtable code
> 
>   - The result from ->iova_to_phys() should be the result from the
> iopgtable code, but with the top bit cleared for addresses over
> 5G.
> 
> This assumes that:
> 
>   1. We're ok setting bit 9 in the ptes mapping region 'E'.
>   2. The IOMMU page-table walker uses CPU physical addresses
> 
> Are those true?

Yes. Then this patch would be close to the one[1] I sent in v8.

Do I need to split this patch into 2 ones?:
a).the pagetable code that support 34bit PA when MTK quirk is enabled.
It only has the symmetric code handle BIT32/BIT33. Besides, I will add
CONFIG_PHYS_ADDR_T_64BIT in the iopte_to_addr as commented before.

b) MTK code that apply the special "4gb mode" flow. And the "oas" will
always is 34 bit since v7s has already supported our case.

[1]http://lists.infradead.org/pipermail/linux-mediatek/2019-June/020991.html

> 
> Thanks,
> 
> Will


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/6] dma-mapping: make dma_atomic_pool_init self-contained

2019-08-16 Thread Christoph Hellwig
The memory allocated for the atomic pool needs to have the same
mapping attributes that we use for remapping, so use
pgprot_dmacoherent instead of open coding it.  Also deduct a
suitable zone to allocate the memory from based on the presence
of the DMA zones.

Signed-off-by: Christoph Hellwig 
---
 arch/arc/mm/dma.c   |  6 --
 arch/arm64/mm/dma-mapping.c |  6 --
 arch/csky/mm/dma-mapping.c  |  6 --
 arch/nds32/kernel/dma.c |  6 --
 include/linux/dma-mapping.h |  1 -
 kernel/dma/remap.c  | 17 ++---
 6 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 62c210e7ee4c..ff4a5752f8cc 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -104,9 +104,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, 
u64 size,
dev_info(dev, "use %sncoherent DMA ops\n",
 dev->dma_coherent ? "" : "non");
 }
-
-static int __init atomic_pool_init(void)
-{
-   return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 676efcda51e6..a1d05f669f67 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -28,12 +28,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
__dma_flush_area(page_address(page), size);
 }
 
-static int __init arm64_dma_init(void)
-{
-   return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
-}
-arch_initcall(arm64_dma_init);
-
 #ifdef CONFIG_IOMMU_DMA
 void arch_teardown_dma_ops(struct device *dev)
 {
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 80783bb71c5c..602a60d47a94 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -14,12 +14,6 @@
 #include 
 #include 
 
-static int __init atomic_pool_init(void)
-{
-   return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
-
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
if (PageHighMem(page)) {
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index 490e3720d694..4206d4b6c8ce 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -80,9 +80,3 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 {
cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
 }
-
-static int __init atomic_pool_init(void)
-{
-   return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f7d1eea32c78..48ebe8295987 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -624,7 +624,6 @@ void *dma_common_pages_remap(struct page **pages, size_t 
size,
const void *caller);
 void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long 
vm_flags);
 
-int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
 bool dma_in_atomic_pool(void *start, size_t size);
 void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
 bool dma_free_from_pool(void *start, size_t size);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index ffe78f0b2fe4..838123f79639 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -105,7 +105,16 @@ static int __init early_coherent_pool(char *p)
 }
 early_param("coherent_pool", early_coherent_pool);
 
-int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
+static gfp_t dma_atomic_pool_gfp(void)
+{
+   if (IS_ENABLED(CONFIG_ZONE_DMA))
+   return GFP_DMA;
+   if (IS_ENABLED(CONFIG_ZONE_DMA32))
+   return GFP_DMA32;
+   return GFP_KERNEL;
+}
+
+static int __init dma_atomic_pool_init(void)
 {
unsigned int pool_size_order = get_order(atomic_pool_size);
unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
@@ -117,7 +126,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
page = dma_alloc_from_contiguous(NULL, nr_pages,
 pool_size_order, false);
else
-   page = alloc_pages(gfp, pool_size_order);
+   page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
if (!page)
goto out;
 
@@ -128,7 +137,8 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
goto free_page;
 
addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
-  prot, __builtin_return_address(0));
+  pgprot_dmacoherent(PAGE_KERNEL),
+  __builtin_return_address(0));
if (!addr)
goto destroy_genpool;
 
@@ -155,6 +165,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
atomic_pool_size / 1024);
   

[PATCH 4/6] dma-mapping: remove arch_dma_mmap_pgprot

2019-08-16 Thread Christoph Hellwig
arch_dma_mmap_pgprot is used for two things:

 1) to override the "normal" uncached page attributes for mapping
memory coherent to devices that can't snoop the CPU caches
 2) to provide the special DMA_ATTR_WRITE_COMBINE semantics on older
arm systems

Replace one with the pgprot_dmacoherent macro that is already provided
by arm and much simpler to use, and lift the DMA_ATTR_WRITE_COMBINE
handling to common code with an explicit arch opt-in.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/Kconfig   |  1 +
 arch/arm/mm/Kconfig|  1 -
 arch/arm/mm/dma-mapping.c  |  6 --
 arch/arm64/Kconfig |  1 -
 arch/arm64/include/asm/pgtable.h   |  4 
 arch/arm64/mm/dma-mapping.c|  6 --
 arch/m68k/Kconfig  |  1 -
 arch/m68k/include/asm/pgtable_mm.h |  3 +++
 arch/m68k/kernel/dma.c |  3 +--
 include/linux/dma-noncoherent.h| 13 +++--
 kernel/dma/Kconfig | 14 +++---
 kernel/dma/mapping.c   |  8 +---
 12 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 33b00579beff..e172fba1e8fd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,6 +7,7 @@ config ARM
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
+   select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c54cd7ed90ba..0609c9e2191b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -665,7 +665,6 @@ config ARM_LPAE
select PHYS_ADDR_T_64BIT
select SWIOTLB
select ARCH_HAS_DMA_COHERENT_TO_PFN
-   select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
help
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d42557ee69c2..d27b12f61737 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2402,12 +2402,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void 
*cpu_addr,
return dma_to_pfn(dev, dma_addr);
 }
 
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-   unsigned long attrs)
-{
-   return __get_dma_pgprot(attrs, prot);
-}
-
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
 {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3adcec05b1f6..dab9dda34206 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -13,7 +13,6 @@ config ARM64
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_COHERENT_TO_PFN
-   select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e09760ece844..6700371227d1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -435,6 +435,10 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | 
PTE_PXN | PTE_UXN)
 #define pgprot_device(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) 
| PTE_PXN | PTE_UXN)
+#define pgprot_dmacoherent(prot) \
+   __pgprot_modify(prot, PTE_ATTRINDX_MASK, \
+   PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
+
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 struct file;
 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index bd2b039f43a6..676efcda51e6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -11,12 +11,6 @@
 
 #include 
 
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-   unsigned long attrs)
-{
-   return pgprot_writecombine(prot);
-}
-
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
 {
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index c518d695c376..a9e564306d3e 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,7 +4,6 @@ config M68K
default y
select ARCH_32BIT_OFF_T
select ARCH_HAS_BINFMT_FLAT
-   select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
diff --git a/arch/m68k/include/asm/pgtable_mm.h 
b/arch/m68k/include/asm/pgtable_mm.h
index fe3ddd73a0cc..fde4534b974f 100644
--- a/arch/m68k/include/asm/pgtable_mm.h

[PATCH 3/6] arm-nommu: remove the unused pgprot_dmacoherent define

2019-08-16 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
---
 arch/arm/include/asm/pgtable-nommu.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/include/asm/pgtable-nommu.h 
b/arch/arm/include/asm/pgtable-nommu.h
index 0b1f6799a32e..d0de24f06724 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -62,7 +62,6 @@ typedef pte_t *pte_addr_t;
  */
 #define pgprot_noncached(prot) (prot)
 #define pgprot_writecombine(prot) (prot)
-#define pgprot_dmacoherent(prot) (prot)
 #define pgprot_device(prot)(prot)
 
 
-- 
2.20.1



[PATCH 2/6] unicore32: remove the unused pgprot_dmacoherent define

2019-08-16 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
---
 arch/unicore32/include/asm/pgtable.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/unicore32/include/asm/pgtable.h 
b/arch/unicore32/include/asm/pgtable.h
index 9492aa304f03..126e961a8cb0 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -198,8 +198,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
__pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
 #define pgprot_writecombine(prot)  \
__pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
-#define pgprot_dmacoherent(prot)   \
-   __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
 
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #define pmd_present(pmd)   (pmd_val(pmd) & PMD_PRESENT)
-- 
2.20.1



[PATCH 1/6] MIPS: remove support for DMA_ATTR_WRITE_COMBINE

2019-08-16 Thread Christoph Hellwig
Mips uses the KSEG1 kernel memory segment do map dma coherent
allocations for non-coherent devices as uncachable, and does not have
any kind of special support for DMA_ATTR_WRITE_COMBINE in the allocation
path.  Thus supporting DMA_ATTR_WRITE_COMBINE in dma_mmap_attrs will
lead to multiple mappings with different caching attributes.

Fixes: 8c172467be36 ("MIPS: Add implementation of dma_map_ops.mmap()")
Signed-off-by: Christoph Hellwig 
---
 arch/mips/Kconfig  | 1 -
 arch/mips/mm/dma-noncoherent.c | 8 
 2 files changed, 9 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d50fafd7bf3a..86e6760ef0d0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1119,7 +1119,6 @@ config DMA_PERDEV_COHERENT
 
 config DMA_NONCOHERENT
bool
-   select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_UNCACHED_SEGMENT
select NEED_DMA_MAP_STATE
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index ed56c6fa7be2..1d4d57dd9acf 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -65,14 +65,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void 
*cpu_addr,
return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
 }
 
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-   unsigned long attrs)
-{
-   if (attrs & DMA_ATTR_WRITE_COMBINE)
-   return pgprot_writecombine(prot);
-   return pgprot_noncached(prot);
-}
-
 static inline void dma_sync_virt(void *addr, size_t size,
enum dma_data_direction dir)
 {
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


cleanup the dma_pgprot handling

2019-08-16 Thread Christoph Hellwig
Hi all,

this series replaced the arch_dma_mmap_pgprot hooks with the
simpler pgprot_dmacoherent as used by the arm code already and
cleans up various bits around that area.

I'd still like to hear a confirmation from the mips folks how
the write combibe attribute can or can't work with the KSEG1
uncached segment.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu