Re: [PATCH v3 2/6] iommu/dma: add support for non-strict mode

2018-07-25 Thread Leizhen (ThunderTown)



On 2018/7/25 6:01, Robin Murphy wrote:
> On 2018-07-12 7:18 AM, Zhen Lei wrote:
>> 1. Save the related domain pointer in struct iommu_dma_cookie, make iovad
>> capable call domain->ops->flush_iotlb_all to flush TLB.
>> 2. Add a new iommu capability: IOMMU_CAP_NON_STRICT, which used to indicate
>> that the iommu domain support non-strict mode.
>> 3. During the iommu domain initialization phase, call capable() to check
>> whether it support non-strcit mode. If so, call init_iova_flush_queue
>> to register iovad->flush_cb callback.
>> 4. All unmap(contains iova-free) APIs will finally invoke __iommu_dma_unmap
>> -->iommu_dma_free_iova. If the domain is non-strict, call queue_iova to
>> put off iova freeing.
>>
>> Signed-off-by: Zhen Lei 
>> ---
>>   drivers/iommu/dma-iommu.c | 25 +
>>   include/linux/iommu.h |  7 +++
>>   2 files changed, 32 insertions(+)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index ddcbbdb..9f0c77a 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -55,6 +55,7 @@ struct iommu_dma_cookie {
>>   };
>>   struct list_headmsi_page_list;
>>   spinlock_tmsi_lock;
>> +struct iommu_domain*domain_non_strict;
>>   };
>> static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
>> @@ -257,6 +258,17 @@ static int iova_reserve_iommu_regions(struct device 
>> *dev,
>>   return ret;
>>   }
>>   +static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
>> +{
>> +struct iommu_dma_cookie *cookie;
>> +struct iommu_domain *domain;
>> +
>> +cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
>> +domain = cookie->domain_non_strict;
>> +
>> +domain->ops->flush_iotlb_all(domain);
>> +}
>> +
>>   /**
>>* iommu_dma_init_domain - Initialise a DMA mapping domain
>>* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
>> @@ -272,6 +284,7 @@ static int iova_reserve_iommu_regions(struct device *dev,
>>   int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>>   u64 size, struct device *dev)
>>   {
>> +const struct iommu_ops *ops = domain->ops;
>>   struct iommu_dma_cookie *cookie = domain->iova_cookie;
>>   struct iova_domain *iovad = &cookie->iovad;
>>   unsigned long order, base_pfn, end_pfn;
>> @@ -308,6 +321,15 @@ int iommu_dma_init_domain(struct iommu_domain *domain, 
>> dma_addr_t base,
>>   }
>> init_iova_domain(iovad, 1UL << order, base_pfn);
>> +
>> +if ((ops->capable && ops->capable(IOMMU_CAP_NON_STRICT)) &&
>> +(IOMMU_DOMAIN_STRICT_MODE(domain) == IOMMU_NON_STRICT)) {
>> +BUG_ON(!ops->flush_iotlb_all);
>> +
>> +cookie->domain_non_strict = domain;
>> +init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
>> +}
>> +
>>   if (!dev)
>>   return 0;
>>   @@ -390,6 +412,9 @@ static void iommu_dma_free_iova(struct 
>> iommu_dma_cookie *cookie,
>>   /* The MSI case is only ever cleaning up its most recent allocation */
>>   if (cookie->type == IOMMU_DMA_MSI_COOKIE)
>>   cookie->msi_iova -= size;
>> +else if (cookie->domain_non_strict)
>> +queue_iova(iovad, iova_pfn(iovad, iova),
>> +size >> iova_shift(iovad), 0);
>>   else
>>   free_iova_fast(iovad, iova_pfn(iovad, iova),
>>   size >> iova_shift(iovad));
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 19938ee..82ed979 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -86,6 +86,12 @@ struct iommu_domain_geometry {
>>   #define IOMMU_DOMAIN_DMA(__IOMMU_DOMAIN_PAGING |\
>>__IOMMU_DOMAIN_DMA_API)
>>   +#define IOMMU_STRICT0
>> +#define IOMMU_NON_STRICT1
>> +#define IOMMU_STRICT_MODE_MASK1UL
>> +#define IOMMU_DOMAIN_STRICT_MODE(domain)\
>> +(domain->type != IOMMU_DOMAIN_UNMANAGED)
>> +
>>   struct iommu_domain {
>>   unsigned type;
>>   const struct iommu_ops *ops;
>> @@ -101,6 +107,7 @@ enum iommu_cap {
>>  transactions */
>>   IOMMU_CAP_INTR_REMAP,/* IOMMU supports interrupt isolation */
>>   IOMMU_CAP_NOEXEC,/* IOMMU_NOEXEC flag */
>> +IOMMU_CAP_NON_STRICT,/* IOMMU supports non-strict mode */
> 
> This still isn't a capability of the hardware. Non-strict mode is pure 
> software policy; *every IOMMU ever* supports it.
> 
> If you want to have this kind of per-driver control, make it a domain 
> attribute - that's not only more logical, but should also work out a lot 
> cleaner overall.

OK, I will use quirk, so this capability will be removed in the next version.

> 
> Robin.
> 
>>   };
>> /*
>>
> 
> .
> 

-- 
Thanks!
BestRegards

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/m

Re: [PATCH v3 0/6] add non-strict mode support for arm-smmu-v3

2018-07-25 Thread Leizhen (ThunderTown)


On 2018/7/25 5:51, Robin Murphy wrote:
> On 2018-07-12 7:18 AM, Zhen Lei wrote:
>> v2 -> v3:
>> Add a bootup option "iommu_strict_mode" to make the manager can choose which
>> mode to be used. The first 5 patches have not changed.
>> +iommu_strict_mode=[arm-smmu-v3]
>> +0 - strict mode (default)
>> +1 - non-strict mode
>>
>> v1 -> v2:
>> Use the lowest bit of the io_pgtable_ops.unmap's iova parameter to pass the 
>> strict mode:
>> 0, IOMMU_STRICT;
>> 1, IOMMU_NON_STRICT;
>> Treat 0 as IOMMU_STRICT, so that the unmap operation can compatible with
>> other IOMMUs which still use strict mode. In other words, this patch series
>> will not impact other IOMMU drivers. I tried add a new quirk 
>> IO_PGTABLE_QUIRK_NON_STRICT
>> in io_pgtable_cfg.quirks, but it can not pass the strict mode of the domain 
>> from SMMUv3
>> driver to io-pgtable module.
> 
> What exactly is the issue there? We don't have any problem with other quirks 
> like NO_DMA, and as I said before, by the time we're allocating the 
> io-pgtable in arm_smmu_domain_finalise() we already know everything there is 
> to know about the domain.

Because userspace can map/unamp and start devices to access memory through VFIO.
So that, the attacker can:
1. alloc memory
2. map
3. unmap
4. free memory
5. repeatedly accesssing the just freed memory base on the just unmapped iova,
   this attack may success if the freed memory is reused by others and the 
mapping still staying in TLB

But if only root user can use VFIO, this is an unnecessary worry. Then both 
normal and VFIO will use the
same strict mode, so that the new quirk IO_PGTABLE_QUIRK_NON_STRICT can easily 
be applied.

> 
>> Add a new member domain_non_strict in struct iommu_dma_cookie, this member 
>> will only be
>> initialized when the related domain and IOMMU driver support non-strict mode.
>>
>> v1:
>> In common, a IOMMU unmap operation follow the below steps:
>> 1. remove the mapping in page table of the specified iova range
>> 2. execute tlbi command to invalid the mapping which is cached in TLB
>> 3. wait for the above tlbi operation to be finished
>> 4. free the IOVA resource
>> 5. free the physical memory resource
>>
>> This maybe a problem when unmap is very frequently, the combination of tlbi
>> and wait operation will consume a lot of time. A feasible method is put off
>> tlbi and iova-free operation, when accumulating to a certain number or
>> reaching a specified time, execute only one tlbi_all command to clean up
>> TLB, then free the backup IOVAs. Mark as non-strict mode.
>>
>> But it must be noted that, although the mapping has already been removed in
>> the page table, it maybe still exist in TLB. And the freed physical memory
>> may also be reused for others. So a attacker can persistent access to memory
>> based on the just freed IOVA, to obtain sensible data or corrupt memory. So
>> the VFIO should always choose the strict mode.
>>
>> Some may consider put off physical memory free also, that will still follow
>> strict mode. But for the map_sg cases, the memory allocation is not 
>> controlled
>> by IOMMU APIs, so it is not enforceable.
>>
>> Fortunately, Intel and AMD have already applied the non-strict mode, and put
>> queue_iova() operation into the common file dma-iommu.c., and my work is 
>> based
>> on it. The difference is that arm-smmu-v3 driver will call IOMMU common APIs 
>> to
>> unmap, but Intel and AMD IOMMU drivers are not.
>>
>> Below is the performance data of strict vs non-strict for NVMe device:
>> Randomly Read  IOPS: 146K(strict) vs 573K(non-strict)
>> Randomly Write IOPS: 143K(strict) vs 513K(non-strict)
> 
> How does that compare to passthrough performance? One thing I'm not entirely 
> clear about is what the realistic use-case for this is - even if invalidation 
> were infinitely fast, enabling translation still typically has a fair impact 
> on overall system performance in terms of latency, power, memory bandwidth, 
> etc., so I can't help wonder what devices exist today for which performance 
> is critical and robustness* is unimportant, yet have crippled addressing 
> capabilities such that they can't just use passthrough.
I have no passthrough performance data yet, I will ask my team to do it. But we 
have tested the Global bypass:
Randomly Read IOPS: 744K, and Randomly Write IOPS: is the same to non-strict.

I'm also not clear. But I think in most cases, the system does not need to run 
at full capacity, but the system
should have that ability. For example, a system's daily load may only 30-50%, 
but the load may increase to 80%+
on festival day.

Passthrough is not enough to support VFIO, and virtualization need the later.

> 
> Robin.
> 
> 
> * I don't want to say "security" here, since I'm actually a lot less 
> concerned about the theoretical malicious endpoint/wild write scenarios than 
> the the much more straightforward malfunctioning device and/or buggy driver 
> causing use-after-free style memory corruption. A

RE: [RFC PATCH 03/10] iommu/vt-d: Allocate groups for mediated devices

2018-07-25 Thread Tian, Kevin
> From: Tian, Kevin
> Sent: Thursday, July 26, 2018 11:04 AM
[...]
> >
> > The IOMMU operations we care about don't take a device handle, I think,
> > just a domain. And VFIO itself only deals with domains when doing
> > map/unmap. Maybe we could add this operation to the IOMMU
> subsystem:
> >
> > child_domain = domain_create_child(parent_dev, parent_domain)
> >
> > A child domain would be a smaller isolation granule, getting a PASID if
> > that's what the IOMMU implements or another mechanism for 2). It is
> > automatically attached to its parent's devices, so attach/detach
> > operations wouldn't be necessary on the child.
> >
> > Depending on the underlying architecture the child domain can support
> > map/unmap on a single stage, or map/unmap for 2nd level and
> > bind_pgtable
> > for 1st level.
> >
> > I'm not sure how this works for host SVA though. I think the
> > sva_bind_dev() API could stay the same, but the internals will need
> > to change.
> >
> 
> hierarchical domain might be the right way to go, but let's do more
> thinking on any corner cases.
> 

btw maybe we don't need make it 'hierarchical', as maintaining
hierarchy usually brings more work. What we require is possibly
just the capability of having one device bind to multiple 
iommu_domains. One domain is reserved for parent driver's
own DMA activities (e.g. serving DMA APIs), while other domains
are auxiliary and can be tagged with a PASID (or any other identifier
which IOMMU can use to support multiple domains). Such identifiers 
may be automatically provisioned when auxiliary domain is attached, 
i.e. not requiring an explicit request from caller. IMO it's safe to 
assume that supporting multiple iommu domains anyway implies 
some finer-grained capability than RID-based in underlying IOMMU.
Then there is no need of parent/child concept.

thoughts?

Thanks
Kevin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [RFC PATCH 03/10] iommu/vt-d: Allocate groups for mediated devices

2018-07-25 Thread Tian, Kevin
> From: Jean-Philippe Brucker [mailto:jean-philippe.bruc...@arm.com]
> Sent: Thursday, July 26, 2018 3:20 AM
> 
> On 25/07/18 07:19, Tian, Kevin wrote:
> >> From: Tian, Kevin
> >> Sent: Wednesday, July 25, 2018 10:16 AM
> >>
> > [...]
> >>>
> >>> There is another way: as we're planning to add a generic pasid_alloc()
> >>> function to the IOMMU API, the mdev module itself could allocate a
> >>> default PASID for each mdev by calling pasid_alloc() on the mdev's
> >>> parent, and then do map()/unmap() with that PASID. This way we don't
> >>> have to add IOMMU ops to the mdev bus, everything can still be done
> >>> using the ops of the parent. And IOMMU drivers "only" have to
> >> implement
> >>> PASID ops, which will be reused by drivers other than mdev.
> >>
> >> yes, PASID is more general abstraction than mdev. However there is
> >> one problem. Please note with new scalable mode now PASID is not
> >> used just for SVA. You can do 1st-level, 2nd-level and nested in PASID
> >> granularity, meaning each PASID can be bound to an unique iommu
> >> domain. However today IOMMU driver allows only one iommu_domain
> >> per device. If we simply install allocated PASID to parent device, we
> >> should also remove that iommu_domain limitation. Is it the way that
> >> we want to pursue?
> >>
> >> mdev avoids such problem as it introduces a separate device...
> >
> > another thing to think about is to simplify the caller logic. It would
> > be good to have consistent IOMMU APIs cross PCI device and
> > mdev, for common VFIO operations e.g. map/unmap, iommu_
> > attach_group, etc. If we need special version ops with PASID, it
> > brings burden to VFIO and other IOMMU clients...
> 
> Yes, providing special ops is the simplest to implement (or least
> invasive, I guess) but isn't particularly elegant. See the patch from
> Jordan below, that I was planning to add to the SVA series (I'm
> laboriously working towards next version) and my email from last week:
> https://patchwork.kernel.org/patch/10412251/
> 
> Whenever I come back to hierarchical IOMMU domains I reject it as too
> complicated, but maybe that is what we need. I find it difficult to
> reason about because domains currently represent both a collection of
> devices and a one or more address spaces. I proposed the io_mm thing to
> represent a single address space, and to avoid adding special cases to
> every function in the IOMMU subsystem that manipulates domains.

I suppose io_mm is still under iommu_domain, right? this is different
from hierarchical iommu domain concept...

> 
> > For IOMMU-capable mdev, there are two use cases which have
> > been talked so far:
> >
> > 1) mdev with PASID-granular DMA isolation
> > 2) mdev inheriting RID from parent device (no sharing)
> 
> Would that be a single mdev per parent device? Otherwise I don't really
> understand how it would work without all map/unmap operations going to
> the parent device's driver.

yes, that is why I said "no sharing". In that special case, host driver 
itself doesn't do DMA. Only the mdev does. 

> 
> > 1) is what this RFC is currently for. 2) is what Alex concerned
> > which is not covered in RFC yet.
> >
> > In my mind, an ideal design is like below:
> >
> > a) VFIO provides an interface for parent driver to specify
> > whether a mdev is IOMMU capable, with flag to indicate
> > whether it's PASID-granular or RID-inherit;
> >
> > b) Once an IOMMU-capable mdev is created, VFIO treats it no
> > different from normal physical devices, using exactly same
> > IOMMU APIs to operate (including future SVA). VFIO doesn't
> > care whether PASID or RID will be used in hardware;
> >
> > c) IOMMU driver then handle RID/PASID difference internally,
> > based on its own configuration and mdev type.
> >
> > To support above goal, I feel a new device handle is a nice fit
> > to represent mdev within IOMMU driver, with same set of
> > capabilities as observed on its parent device.
> 
> It's a good abstraction, but I'm still concerned about other users of
> PASID-granular DMA isolation, for example GPU drivers wanting to improve
> isolation of DMA bufs, will want the same functionality without going
> through the vfio-mdev module.

for GPU I think you meant SVA. that one anyway needs its own 
interface as what we have been discussing in yours and Jacob's
series.

Here mdev is orthogonal to a specific capability like SVA. It is 
sort of logical representation of subset resource of parent device,
on top of which we can enable IOMMU capabilities including SVA.

I'm not sure whether it is good to combine two requirements closely...

> 
> The IOMMU operations we care about don't take a device handle, I think,
> just a domain. And VFIO itself only deals with domains when doing
> map/unmap. Maybe we could add this operation to the IOMMU subsystem:
> 
> child_domain = domain_create_child(parent_dev, parent_domain)
> 
> A child domain would be a smaller isolation granule, getting a PASID if
> that's what the IOMMU impleme

Re: [PATCH] iommu/ipmmu-vmsa: Clarify supported platforms

2018-07-25 Thread Laurent Pinchart
Hi Geert,

Thank you for the patch.

On Wednesday, 25 July 2018 16:10:29 EEST Geert Uytterhoeven wrote:
> The Renesas IPMMU-VMSA driver supports not just R-Car H2 and M2 SoCs,
> but also other R-Car Gen2 and R-Car Gen3 SoCs.
> 
> Drop a superfluous "Renesas" while at it.
> 
> Signed-off-by: Geert Uytterhoeven 

Reviewed-by: Laurent Pinchart 

> ---
>  drivers/iommu/Kconfig | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 568ae81b0e99b67b..c69dc0b29b5df37f 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -306,8 +306,8 @@ config IPMMU_VMSA
>   select IOMMU_IO_PGTABLE_LPAE
>   select ARM_DMA_USE_IOMMU
>   help
> -   Support for the Renesas VMSA-compatible IPMMU Renesas found in the
> -   R-Mobile APE6 and R-Car H2/M2 SoCs.
> +   Support for the Renesas VMSA-compatible IPMMU found in the R-Mobile
> +   APE6, R-Car Gen2, and R-Car Gen3 SoCs.
> 
> If unsure, say N.


-- 
Regards,

Laurent Pinchart



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 03/10] iommu/vt-d: Allocate groups for mediated devices

2018-07-25 Thread Jean-Philippe Brucker
On 25/07/18 07:19, Tian, Kevin wrote:
>> From: Tian, Kevin
>> Sent: Wednesday, July 25, 2018 10:16 AM
>>
> [...]
>>>
>>> There is another way: as we're planning to add a generic pasid_alloc()
>>> function to the IOMMU API, the mdev module itself could allocate a
>>> default PASID for each mdev by calling pasid_alloc() on the mdev's
>>> parent, and then do map()/unmap() with that PASID. This way we don't
>>> have to add IOMMU ops to the mdev bus, everything can still be done
>>> using the ops of the parent. And IOMMU drivers "only" have to
>> implement
>>> PASID ops, which will be reused by drivers other than mdev.
>>
>> yes, PASID is more general abstraction than mdev. However there is
>> one problem. Please note with new scalable mode now PASID is not
>> used just for SVA. You can do 1st-level, 2nd-level and nested in PASID
>> granularity, meaning each PASID can be bound to an unique iommu
>> domain. However today IOMMU driver allows only one iommu_domain
>> per device. If we simply install allocated PASID to parent device, we
>> should also remove that iommu_domain limitation. Is it the way that
>> we want to pursue?
>>
>> mdev avoids such problem as it introduces a separate device...
> 
> another thing to think about is to simplify the caller logic. It would
> be good to have consistent IOMMU APIs cross PCI device and 
> mdev, for common VFIO operations e.g. map/unmap, iommu_
> attach_group, etc. If we need special version ops with PASID, it 
> brings burden to VFIO and other IOMMU clients...

Yes, providing special ops is the simplest to implement (or least
invasive, I guess) but isn't particularly elegant. See the patch from
Jordan below, that I was planning to add to the SVA series (I'm
laboriously working towards next version) and my email from last week:
https://patchwork.kernel.org/patch/10412251/

Whenever I come back to hierarchical IOMMU domains I reject it as too
complicated, but maybe that is what we need. I find it difficult to
reason about because domains currently represent both a collection of
devices and a one or more address spaces. I proposed the io_mm thing to
represent a single address space, and to avoid adding special cases to
every function in the IOMMU subsystem that manipulates domains.

> For IOMMU-capable mdev, there are two use cases which have 
> been talked so far:
> 
> 1) mdev with PASID-granular DMA isolation
> 2) mdev inheriting RID from parent device (no sharing)

Would that be a single mdev per parent device? Otherwise I don't really
understand how it would work without all map/unmap operations going to
the parent device's driver.

> 1) is what this RFC is currently for. 2) is what Alex concerned 
> which is not covered in RFC yet.
> 
> In my mind, an ideal design is like below:
> 
> a) VFIO provides an interface for parent driver to specify 
> whether a mdev is IOMMU capable, with flag to indicate 
> whether it's PASID-granular or RID-inherit;
> 
> b) Once an IOMMU-capable mdev is created, VFIO treats it no
> different from normal physical devices, using exactly same 
> IOMMU APIs to operate (including future SVA). VFIO doesn't 
> care whether PASID or RID will be used in hardware;
> 
> c) IOMMU driver then handle RID/PASID difference internally, 
> based on its own configuration and mdev type.
> 
> To support above goal, I feel a new device handle is a nice fit 
> to represent mdev within IOMMU driver, with same set of 
> capabilities as observed on its parent device.

It's a good abstraction, but I'm still concerned about other users of
PASID-granular DMA isolation, for example GPU drivers wanting to improve
isolation of DMA bufs, will want the same functionality without going
through the vfio-mdev module.

The IOMMU operations we care about don't take a device handle, I think,
just a domain. And VFIO itself only deals with domains when doing
map/unmap. Maybe we could add this operation to the IOMMU subsystem:

child_domain = domain_create_child(parent_dev, parent_domain)

A child domain would be a smaller isolation granule, getting a PASID if
that's what the IOMMU implements or another mechanism for 2). It is
automatically attached to its parent's devices, so attach/detach
operations wouldn't be necessary on the child.

Depending on the underlying architecture the child domain can support
map/unmap on a single stage, or map/unmap for 2nd level and bind_pgtable
for 1st level.

I'm not sure how this works for host SVA though. I think the
sva_bind_dev() API could stay the same, but the internals will need
to change.

Thanks,
Jean

-
I was going to send the following patch for dealing with private PASIDs.
I used it for a vfio-mdev prototype internally: VFIO would call
iommu_sva_alloc_pasid on the parent device and then sva_map/sva_unmap on
the parent domain + allocated io_mm (which redirects to
iommu_map/iommu_unmap when there is no io_mm). Since it relies on io_mm
instead of child domains, it duplicates the iommu ops, and as discussed
might not be adapted to Vt-d 

[PATCH 2/2] iommu/arm-smmu: Add support for Stratix10 smmu-v2 variant

2018-07-25 Thread thor . thayer
From: Thor Thayer 

Add the clocks required for the Stratix10 SMMU. Define the
clock names in the SMMU name array and add DT compatible
matching element.

Signed-off-by: Thor Thayer 
---
This patch is dependent on the patch series
"iommu/arm-smmu: Add runtime pm/sleep support"
(https://patchwork.ozlabs.org/cover/946160/)
---
 drivers/iommu/arm-smmu.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7c69736a30f8..d46216b34a77 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1983,6 +1983,17 @@ static const struct arm_smmu_match_data qcom_smmuv2 = {
.num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
 };
 
+static const char * const altr_smmuv2_clks[] = {
+   "masters"
+};
+
+static const struct arm_smmu_match_data altr_smmuv2 = {
+   .version = ARM_SMMU_V2,
+   .model = ARM_MMU500,
+   .clks = altr_smmuv2_clks,
+   .num_clks = ARRAY_SIZE(altr_smmuv2_clks),
+};
+
 static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
@@ -1991,6 +2002,7 @@ static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
+   { .compatible = "altr,smmu-v2", .data = &altr_smmuv2 },
{ },
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/2] iommu/arm-smmu: Add Stratix10 SMMU Support

2018-07-25 Thread thor . thayer
From: Thor Thayer 

Add SMMU support for the Stratix10 SOCFPGA. This patch requires
clock enables for the master TBUs and therefore has a dependency
on patches currently being reviewed.

This patchset is dependent on the patch series
"iommu/arm-smmu: Add runtime pm/sleep support"
(https://patchwork.ozlabs.org/cover/946160/)

Thor Thayer (2):
  arm64: dts: stratix10: Add Stratix10 SMMU support
  iommu/arm-smmu: Add support for Stratix10 smmu-v2 variant

 .../devicetree/bindings/iommu/arm,smmu.txt | 25 ++
 arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi  | 30 ++
 drivers/iommu/arm-smmu.c   | 12 +
 3 files changed, 67 insertions(+)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] arm64: dts: stratix10: Add Stratix10 SMMU support

2018-07-25 Thread thor . thayer
From: Thor Thayer 

Add SMMU support to the Stratix10 Device Tree which
includes adding the SMMU node and adding IOMMU stream
ids to the SMMU peripherals. Update bindings.

Signed-off-by: Thor Thayer 
---
This patch is dependent on the patch series
"iommu/arm-smmu: Add runtime pm/sleep support"
(https://patchwork.ozlabs.org/cover/946160/)
---
 .../devicetree/bindings/iommu/arm,smmu.txt | 25 ++
 arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi  | 30 ++
 2 files changed, 55 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 7c71a6ed465a..8e3fe0594e3e 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -18,6 +18,7 @@ conditions.
 "arm,mmu-500"
 "cavium,smmu-v2"
 "qcom,-smmu-v2", "qcom,smmu-v2"
+"altr,smmu-v2"
 
   depending on the particular implementation and/or the
   version of the architecture implemented.
@@ -179,3 +180,27 @@ conditions.
 <&mmcc SMMU_MDP_AHB_CLK>;
clock-names = "bus", "iface";
};
+
+   /* Stratix10 arm,smmu-v2 implementation */
+   smmu5: iommu@fa00 {
+   compatible = "altr,smmu-v2", "arm,mmu-500",
+"arm,smmu-v2";
+   reg = <0xfa00 0x4>;
+   #global-interrupts = <2>;
+   #iommu-cells = <1>;
+   clocks = <&clkmgr STRATIX10_L4_MAIN_CLK>;
+   clock-names = "masters";
+   interrupt-parent = <&intc>;
+   interrupts = <0 128 4>, /* Global Secure Fault */
+   <0 129 4>, /* Global Non-secure Fault */
+   /* Non-secure Context Interrupts (32) */
+   <0 138 4>, <0 139 4>, <0 140 4>, <0 141 4>,
+   <0 142 4>, <0 143 4>, <0 144 4>, <0 145 4>,
+   <0 146 4>, <0 147 4>, <0 148 4>, <0 149 4>,
+   <0 150 4>, <0 151 4>, <0 152 4>, <0 153 4>,
+   <0 154 4>, <0 155 4>, <0 156 4>, <0 157 4>,
+   <0 158 4>, <0 159 4>, <0 160 4>, <0 161 4>,
+   <0 162 4>, <0 163 4>, <0 164 4>, <0 165 4>,
+   <0 166 4>, <0 167 4>, <0 168 4>, <0 169 4>;
+   stream-match-mask = <0x7ff0>;
+   };
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi 
b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index d033da401c26..e38ca86d48f6 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -137,6 +137,7 @@
reset-names = "stmmaceth", "stmmaceth-ocp";
clocks = <&clkmgr STRATIX10_EMAC0_CLK>;
clock-names = "stmmaceth";
+   iommus = <&smmu 1>;
status = "disabled";
};
 
@@ -150,6 +151,7 @@
reset-names = "stmmaceth", "stmmaceth-ocp";
clocks = <&clkmgr STRATIX10_EMAC1_CLK>;
clock-names = "stmmaceth";
+   iommus = <&smmu 2>;
status = "disabled";
};
 
@@ -163,6 +165,7 @@
reset-names = "stmmaceth", "stmmaceth-ocp";
clocks = <&clkmgr STRATIX10_EMAC2_CLK>;
clock-names = "stmmaceth";
+   iommus = <&smmu 3>;
status = "disabled";
};
 
@@ -273,6 +276,7 @@
clocks = <&clkmgr STRATIX10_L4_MP_CLK>,
 <&clkmgr STRATIX10_SDMMC_CLK>;
clock-names = "biu", "ciu";
+   iommus = <&smmu 5>;
status = "disabled";
};
 
@@ -307,6 +311,30 @@
altr,modrst-offset = <0x20>;
};
 
+   smmu: iommu@fa00 {
+   compatible = "altr,smmu-v2", "arm,mmu-500",
+"arm,smmu-v2";
+   reg = <0xfa00 0x4>;
+   #global-interrupts = <2>;
+   #iommu-cells = <1>;
+   clocks = <&clkmgr STRATIX10_L4_MAIN_CLK>;
+   clock-names = "masters";
+   interrupt-parent = <&intc>;
+   interrupts = <0 128 4>, /* Global Secure Fault */
+   <0 129 4>, /* Global Non-secure Fault */
+   /* Non-secure Context Interrupts (32) */
+   <0 138 4>, <0 139 4>, <0 140 4>, <0 141 4>,
+   <0 142 4>, <0 143 4>, <0 144 4>, <0

Re: [PATCH v13 1/4] iommu/arm-smmu: Add pm_runtime/sleep ops

2018-07-25 Thread Vivek Gautam
On Tue, Jul 24, 2018 at 8:51 PM, Robin Murphy  wrote:
> On 19/07/18 11:15, Vivek Gautam wrote:
>>
>> From: Sricharan R 
>>
>> The smmu needs to be functional only when the respective
>> master's using it are active. The device_link feature
>> helps to track such functional dependencies, so that the
>> iommu gets powered when the master device enables itself
>> using pm_runtime. So by adapting the smmu driver for
>> runtime pm, above said dependency can be addressed.
>>
>> This patch adds the pm runtime/sleep callbacks to the
>> driver and also the functions to parse the smmu clocks
>> from DT and enable them in resume/suspend.
>>
>> Also, while we enable the runtime pm add a pm sleep suspend
>> callback that pushes devices to low power state by turning
>> the clocks off in a system sleep.
>> Also add corresponding clock enable path in resume callback.
>>
>> Signed-off-by: Sricharan R 
>> Signed-off-by: Archit Taneja 
>> [vivek: rework for clock and pm ops]
>> Signed-off-by: Vivek Gautam 
>> Reviewed-by: Tomasz Figa 
>> ---
>>
>> Changes since v12:
>>   - Added pm sleep .suspend callback. This disables the clocks.
>>   - Added corresponding change to enable clocks in .resume
>>pm sleep callback.
>>
>>   drivers/iommu/arm-smmu.c | 75
>> ++--
>>   1 file changed, 73 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index c73cfce1ccc0..9138a6fffe04 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -48,6 +48,7 @@
>>   #include 
>>   #include 
>>   #include 
>> +#include 
>>   #include 
>>   #include 
>>   @@ -205,6 +206,8 @@ struct arm_smmu_device {
>> u32 num_global_irqs;
>> u32 num_context_irqs;
>> unsigned int*irqs;
>> +   struct clk_bulk_data*clks;
>> +   int num_clks;
>> u32 cavium_id_base; /* Specific to
>> Cavium */
>>   @@ -1897,10 +1900,12 @@ static int arm_smmu_device_cfg_probe(struct
>> arm_smmu_device *smmu)
>>   struct arm_smmu_match_data {
>> enum arm_smmu_arch_version version;
>> enum arm_smmu_implementation model;
>> +   const char * const *clks;
>> +   int num_clks;
>>   };
>> #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
>> -static struct arm_smmu_match_data name = { .version = ver, .model = imp }
>> +static const struct arm_smmu_match_data name = { .version = ver, .model =
>> imp }
>> ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
>>   ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
>> @@ -1919,6 +1924,23 @@ static const struct of_device_id
>> arm_smmu_of_match[] = {
>>   };
>>   MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
>>   +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu,
>> +  const char * const *clks)
>> +{
>> +   int i;
>> +
>> +   if (smmu->num_clks < 1)
>> +   return;
>> +
>> +   smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks,
>> + sizeof(*smmu->clks), GFP_KERNEL);
>> +   if (!smmu->clks)
>> +   return;
>> +
>> +   for (i = 0; i < smmu->num_clks; i++)
>> +   smmu->clks[i].id = clks[i];
>> +}
>> +
>>   #ifdef CONFIG_ACPI
>>   static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
>>   {
>> @@ -2001,6 +2023,9 @@ static int arm_smmu_device_dt_probe(struct
>> platform_device *pdev,
>> data = of_device_get_match_data(dev);
>> smmu->version = data->version;
>> smmu->model = data->model;
>> +   smmu->num_clks = data->num_clks;
>> +
>> +   arm_smmu_fill_clk_data(smmu, data->clks);
>> parse_driver_options(smmu);
>>   @@ -2099,6 +2124,14 @@ static int arm_smmu_device_probe(struct
>> platform_device *pdev)
>> smmu->irqs[i] = irq;
>> }
>>   + err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks);
>> +   if (err)
>> +   return err;
>> +
>> +   err = clk_bulk_prepare(smmu->num_clks, smmu->clks);
>> +   if (err)
>> +   return err;
>> +
>> err = arm_smmu_device_cfg_probe(smmu);
>> if (err)
>> return err;
>> @@ -2181,6 +2214,9 @@ static int arm_smmu_device_remove(struct
>> platform_device *pdev)
>> /* Turn the thing off */
>> writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
>> +
>> +   clk_bulk_unprepare(smmu->num_clks, smmu->clks);
>> +
>> return 0;
>>   }
>>   @@ -2189,15 +2225,50 @@ static void arm_smmu_device_shutdown(struct
>> platform_device *pdev)
>> arm_smmu_device_remove(pdev);
>>   }
>>   +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
>> +{
>> +   struct arm_smmu_device *smmu = dev_get_drvdata(dev);
>> +
>> +   return clk_bulk_enable

Re: [PATCH v2 6/7] ACPI/IORT: Don't set default coherent DMA mask

2018-07-25 Thread Robin Murphy

On 2018-07-25 4:27 PM, Lorenzo Pieralisi wrote:

On Mon, Jul 23, 2018 at 11:16:11PM +0100, Robin Murphy wrote:

Now that we can track upstream DMA constraints properly with
bus_dma_mask instead of trying (and failing) to maintain it in
coherent_dma_mask, it doesn't make much sense for the firmware code to
be touching the latter at all. It's merely papering over bugs wherein a
driver has failed to call dma_set_coherent_mask() *and* the bus code has
not initialised any default value.


Nit: I do not think the driver had a chance to probe and therefore call
dma_set_coherent_mask() before iort_dma_setup() is executed, right ?


Indeed, dma_configure() should be invoked shortly before the driver's 
.probe() routine, so at this point the struct device has only been 
touched by the bus code which created it. Anyone managing to call it on 
a device which already has a driver bound is doing something horribly wrong.



Anyway, the patch makes perfect sense:

Acked-by: Lorenzo Pieralisi 


Thanks,
Robin.


We don't really want to encourage more drivers coercing dma_mask so
we'll continue to fix that up if necessary, but add a warning to help
flush out any such buggy bus code that remains.

CC: Lorenzo Pieralisi 
CC: Hanjun Guo 
CC: Sudeep Holla 
Signed-off-by: Robin Murphy 
---
  drivers/acpi/arm64/iort.c | 22 +++---
  1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index bc51cff5505e..08f26db2da7e 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -978,20 +978,20 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, 
u64 *dma_size)
int ret, msb;
  
  	/*

-* Set default coherent_dma_mask to 32 bit.  Drivers are expected to
-* setup the correct supported mask.
+* If @dev is expected to be DMA-capable then the bus code that created
+* it should have initialised its dma_mask pointer by this point. For
+* now, we'll continue the legacy behaviour of coercing it to the
+* coherent mask if not, but we'll no longer do so quietly.
 */
-   if (!dev->coherent_dma_mask)
-   dev->coherent_dma_mask = DMA_BIT_MASK(32);
-
-   /*
-* Set it to coherent_dma_mask by default if the architecture
-* code has not set it.
-*/
-   if (!dev->dma_mask)
+   if (!dev->dma_mask) {
+   dev_warn(dev, "DMA mask not set\n");
dev->dma_mask = &dev->coherent_dma_mask;
+   }
  
-	size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);

+   if (dev->coherent_dma_mask)
+   size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+   else
+   size = 1ULL << 32;
  
  	if (dev_is_pci(dev)) {

ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
--
2.17.1.dirty


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 6/7] ACPI/IORT: Don't set default coherent DMA mask

2018-07-25 Thread Lorenzo Pieralisi
On Mon, Jul 23, 2018 at 11:16:11PM +0100, Robin Murphy wrote:
> Now that we can track upstream DMA constraints properly with
> bus_dma_mask instead of trying (and failing) to maintain it in
> coherent_dma_mask, it doesn't make much sense for the firmware code to
> be touching the latter at all. It's merely papering over bugs wherein a
> driver has failed to call dma_set_coherent_mask() *and* the bus code has
> not initialised any default value.

Nit: I do not think the driver had a chance to probe and therefore call
dma_set_coherent_mask() before iort_dma_setup() is executed, right ?

Anyway, the patch makes perfect sense:

Acked-by: Lorenzo Pieralisi 

> We don't really want to encourage more drivers coercing dma_mask so
> we'll continue to fix that up if necessary, but add a warning to help
> flush out any such buggy bus code that remains.
> 
> CC: Lorenzo Pieralisi 
> CC: Hanjun Guo 
> CC: Sudeep Holla 
> Signed-off-by: Robin Murphy 
> ---
>  drivers/acpi/arm64/iort.c | 22 +++---
>  1 file changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index bc51cff5505e..08f26db2da7e 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -978,20 +978,20 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, 
> u64 *dma_size)
>   int ret, msb;
>  
>   /*
> -  * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
> -  * setup the correct supported mask.
> +  * If @dev is expected to be DMA-capable then the bus code that created
> +  * it should have initialised its dma_mask pointer by this point. For
> +  * now, we'll continue the legacy behaviour of coercing it to the
> +  * coherent mask if not, but we'll no longer do so quietly.
>*/
> - if (!dev->coherent_dma_mask)
> - dev->coherent_dma_mask = DMA_BIT_MASK(32);
> -
> - /*
> -  * Set it to coherent_dma_mask by default if the architecture
> -  * code has not set it.
> -  */
> - if (!dev->dma_mask)
> + if (!dev->dma_mask) {
> + dev_warn(dev, "DMA mask not set\n");
>   dev->dma_mask = &dev->coherent_dma_mask;
> + }
>  
> - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
> + if (dev->coherent_dma_mask)
> + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
> + else
> + size = 1ULL << 32;
>  
>   if (dev_is_pci(dev)) {
>   ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
> -- 
> 2.17.1.dirty
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/iova: Update cached node pointer when current node fails to get any free IOVA

2018-07-25 Thread Robin Murphy

On 12/07/18 08:45, Ganapatrao Kulkarni wrote:

Hi Robin,


On Mon, Jun 4, 2018 at 9:36 AM, Ganapatrao Kulkarni  wrote:

ping??

On Mon, May 21, 2018 at 6:45 AM, Ganapatrao Kulkarni  wrote:

On Thu, Apr 26, 2018 at 3:15 PM, Ganapatrao Kulkarni  wrote:

Hi Robin,

On Mon, Apr 23, 2018 at 11:11 PM, Ganapatrao Kulkarni
 wrote:

On Mon, Apr 23, 2018 at 10:07 PM, Robin Murphy  wrote:

On 19/04/18 18:12, Ganapatrao Kulkarni wrote:


The performance drop is observed with long hours iperf testing using 40G
cards. This is mainly due to long iterations in finding the free iova
range in 32bit address space.

In current implementation for 64bit PCI devices, there is always first
attempt to allocate iova from 32bit(SAC preferred over DAC) address
range. Once we run out 32bit range, there is allocation from higher range,
however due to cached32_node optimization it does not suppose to be
painful. cached32_node always points to recently allocated 32-bit node.
When address range is full, it will be pointing to last allocated node
(leaf node), so walking rbtree to find the available range is not
expensive affair. However this optimization does not behave well when
one of the middle node is freed. In that case cached32_node is updated
to point to next iova range. The next iova allocation will consume free
range and again update cached32_node to itself. From now on, walking
over 32-bit range is more expensive.

This patch adds fix to update cached node to leaf node when there are no
iova free range left, which avoids unnecessary long iterations.



The only trouble with this is that "allocation failed" doesn't uniquely mean
"space full". Say that after some time the 32-bit space ends up empty except
for one page at 0x1000 and one at 0x8000, then somebody tries to
allocate 2GB. If we move the cached node down to the leftmost entry when
that fails, all subsequent allocation attempts are now going to fail despite
the space being 99.% free!

I can see a couple of ways to solve that general problem of free space above
the cached node getting lost, but neither of them helps with the case where
there is genuinely insufficient space (and if anything would make it even
slower). In terms of the optimisation you want here, i.e. fail fast when an
allocation cannot possibly succeed, the only reliable idea which comes to
mind is free-PFN accounting. I might give that a go myself to see how ugly
it looks.


did you get any chance to look in to this issue?
i am waiting for your suggestion/patch for this issue!


I got as far as [1], but I wasn't sure how much I liked it, since it 
still seems a little invasive for such a specific case (plus I can't 
remember if it's actually been debugged or not). I think in the end I 
started wondering whether it's even worth bothering with the 32-bit 
optimisation for PCIe devices - 4 extra bytes worth of TLP is surely a 
lot less significant than every transaction taking up to 50% more bus 
cycles was for legacy PCI.


Robin.

[1] 
http://www.linux-arm.org/git?p=linux-rm.git;a=commitdiff;h=a8e0e4af10ebebb3669750e05bf0028e5bd6afe8

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/7] Stop losing firmware-set DMA masks

2018-07-25 Thread Ard Biesheuvel
On 25 July 2018 at 13:31, Christoph Hellwig  wrote:
> Hi Robin,
>
> this series looks fine to me.  Do you want me to pull this into the
> dma-mapping tree?  I'd like to see a few more ACKs from the ACPI/OF/
> IOMMU maintainers though.
>

This fixes the issue I reported with the on-SoC NIC of the Socionext
SynQuacer, so assuming it works as advertised:

Acked-by: Ard Biesheuvel 

for the series.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH/RFC] drivers/vfio: Allow type-1 IOMMU instantiation with Renesas IPMMU-VMSA

2018-07-25 Thread Robin Murphy

Hi Geert,

On 25/07/18 14:34, Geert Uytterhoeven wrote:

The Renesas IPMMU-VMSA driver is compatible with the notion of a type-1
IOMMU in VFIO.

This patch allows guests to use the VFIO_IOMMU_TYPE1 API on hosts
equipped with a Renesas VMSA-compatible IPMMU.

Signed-off-by: Geert Uytterhoeven 
---
Lightly tested with sata_rcar on Renesas R-Car H3 ES2.0.

For testing, this patch and all prerequisites are available in the
topic/rcar3-virt-gpio-passthrough-v3 branch of my git repository at
git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git.
---
  drivers/vfio/Kconfig | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index c84333eb5eb59bef..a3e21e7c066596d7 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -21,7 +21,8 @@ config VFIO_VIRQFD
  menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
-   select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
+   select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3 || \
+   IPMMU_VMSA)


I recall this came up in the context of virtio-iommu too[1], wherein we 
decided that listing individual drivers was actually a bit silly and 
this should simply have ARM || ARM64 for consistency. Looks like there's 
even more justification now :)


Robin.

[1] https://www.mail-archive.com/kvmarm@lists.cs.columbia.edu/msg16235.html


select ANON_INODES
help
  VFIO provides a framework for secure userspace device drivers.


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH/RFC] drivers/vfio: Allow type-1 IOMMU instantiation with Renesas IPMMU-VMSA

2018-07-25 Thread Geert Uytterhoeven
The Renesas IPMMU-VMSA driver is compatible with the notion of a type-1
IOMMU in VFIO.

This patch allows guests to use the VFIO_IOMMU_TYPE1 API on hosts
equipped with a Renesas VMSA-compatible IPMMU.

Signed-off-by: Geert Uytterhoeven 
---
Lightly tested with sata_rcar on Renesas R-Car H3 ES2.0.

For testing, this patch and all prerequisites are available in the
topic/rcar3-virt-gpio-passthrough-v3 branch of my git repository at
git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git.
---
 drivers/vfio/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index c84333eb5eb59bef..a3e21e7c066596d7 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -21,7 +21,8 @@ config VFIO_VIRQFD
 menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
-   select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
+   select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3 || \
+   IPMMU_VMSA)
select ANON_INODES
help
  VFIO provides a framework for secure userspace device drivers.
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] arm64: dts: stratix10: Add SMMU Node

2018-07-25 Thread Robin Murphy

On 16/07/18 19:56, Thor Thayer wrote:
[...]

@@ -332,6 +342,36 @@
  altr,modrst-offset = <0x20>;
  };
+    smmu: iommu@fa00 {
+    compatible = "arm,mmu-500", "arm,smmu-v2";
+    reg = <0xfa00 0x4>;
+    #global-interrupts = <9>;
+    #iommu-cells = <1>;
+    clocks = <&clkmgr STRATIX10_L4_MAIN_CLK>;
+    clock-names = "smmu_clk";
+    interrupt-parent = <&intc>;
+    interrupts = <0 128 4>,    /* Global Secure Fault */
+    <0 129 4>, /* Global Non-secure Fault */
+    <0 130 4>, /* FPGA Performance Counter */
+    <0 131 4>, /* DMA Performance Counter */
+    <0 132 4>, /* EMAC Performance Counter */
+    <0 133 4>, /* IO Performance Counter */
+    <0 134 4>, /* SDM Performance Counter */


Note that there isn't much benefit to adding the secure or PMU 
interrupts here other than to document the hardware - FWIW I have 
actually been working on a PMU driver, and needless to say it turns 
out not to be sufficient just having those munged into the SMMU global 
fault handler.


Thanks for pointing this out. I was following other SMMU-500 device 
trees. Just to clarify, how should I simplify this? Should I replace all 
the above with the following?


 <0 129 4>, /* Global Non-secure Fault */

Or will your upcoming PMU driver need the PMU units? It sounded like 
using the just Global fault was not sufficient.


No, you had it right - what I meant was it's only worth including the 
actual global fault signals themselves here (there's no harm in leaving 
the secure one in if you like, it just won't do anything if the GIC is 
configured correctly). What I found was that the current SMMU binding 
leaves no reasonable way to encode the PMU interrupts in this interrupt 
list that doesn't break at least one of the possible old/new driver/DT 
combinations, so whatever happens they will need to be specified 
separately once a PMU binding is defined.



+    <0 136 4>, /* Non-secure Combined Interrupt */
+    <0 137 4>, /* Secure Combined Interrupt */


Similarly the combined interrupt; that's literally just all these 
other interrupt lines ORed together at the SMMU end, and would 
generally only be useful if you *didn't* have the individual lines 
wired up. As it stands with everything listed, any event will also 
generate a spurious global fault IRQ, which isn't ideal (not that you 
should get many interrupts during normal operation, but still...)



And I'd remove both of these then, right?


Indeed.

Thanks for the review and helpful comments (and also pointing out the 
existing clock patch in my patch series summary)!


Cheers,
Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 3/7] ACPI/IORT: Set bus DMA mask as appropriate

2018-07-25 Thread Lorenzo Pieralisi
On Mon, Jul 23, 2018 at 11:16:08PM +0100, Robin Murphy wrote:
> When an explicit DMA limit is described by firmware, we need to remember
> it regardless of how drivers might subsequently update their devices'
> masks. The new bus_dma_mask field does that.
> 
> CC: Lorenzo Pieralisi 
> CC: Hanjun Guo 
> CC: Sudeep Holla 
> Signed-off-by: Robin Murphy 
> ---
>  drivers/acpi/arm64/iort.c | 1 +
>  1 file changed, 1 insertion(+)

Acked-by: Lorenzo Pieralisi 

> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index 4a66896e2aa3..bc51cff5505e 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -1014,6 +1014,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, 
> u64 *dma_size)
>* Limit coherent and dma mask based on size
>* retrieved from firmware.
>*/
> + dev->bus_dma_mask = mask;
>   dev->coherent_dma_mask = mask;
>   *dev->dma_mask = mask;
>   }
> -- 
> 2.17.1.dirty
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2] iommu/ipmmu-vmsa: Fix allocation in atomic context

2018-07-25 Thread Geert Uytterhoeven
When attaching a device to an IOMMU group with
CONFIG_DEBUG_ATOMIC_SLEEP=y:

BUG: sleeping function called from invalid context at mm/slab.h:421
in_atomic(): 1, irqs_disabled(): 128, pid: 61, name: kworker/1:1
...
Call trace:
 ...
 arm_lpae_alloc_pgtable+0x114/0x184
 arm_64_lpae_alloc_pgtable_s1+0x2c/0x128
 arm_32_lpae_alloc_pgtable_s1+0x40/0x6c
 alloc_io_pgtable_ops+0x60/0x88
 ipmmu_attach_device+0x140/0x334

ipmmu_attach_device() takes a spinlock, while arm_lpae_alloc_pgtable()
allocates memory using GFP_KERNEL.  Originally, the ipmmu-vmsa driver
had its own custom page table allocation implementation using
GFP_ATOMIC, hence the spinlock was fine.

Fix this by replacing the spinlock by a mutex, like the arm-smmu driver
does.

Fixes: f20ed39f53145e45 ("iommu/ipmmu-vmsa: Use the ARM LPAE page table 
allocator")
Signed-off-by: Geert Uytterhoeven 
Reviewed-by: Laurent Pinchart 
---
v2:
  - Add Reviewed-by.
---
 drivers/iommu/ipmmu-vmsa.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 40ae6e87cb880223..ef566b4989d6e2f8 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -73,7 +73,7 @@ struct ipmmu_vmsa_domain {
struct io_pgtable_ops *iop;
 
unsigned int context_id;
-   spinlock_t lock;/* Protects mappings */
+   struct mutex mutex; /* Protects mappings */
 };
 
 static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
@@ -595,7 +595,7 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned 
type)
if (!domain)
return NULL;
 
-   spin_lock_init(&domain->lock);
+   mutex_init(&domain->mutex);
 
return &domain->io_domain;
 }
@@ -641,7 +641,6 @@ static int ipmmu_attach_device(struct iommu_domain 
*io_domain,
struct iommu_fwspec *fwspec = dev->iommu_fwspec;
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
-   unsigned long flags;
unsigned int i;
int ret = 0;
 
@@ -650,7 +649,7 @@ static int ipmmu_attach_device(struct iommu_domain 
*io_domain,
return -ENXIO;
}
 
-   spin_lock_irqsave(&domain->lock, flags);
+   mutex_lock(&domain->mutex);
 
if (!domain->mmu) {
/* The domain hasn't been used yet, initialize it. */
@@ -674,7 +673,7 @@ static int ipmmu_attach_device(struct iommu_domain 
*io_domain,
} else
dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
 
-   spin_unlock_irqrestore(&domain->lock, flags);
+   mutex_unlock(&domain->mutex);
 
if (ret < 0)
return ret;
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/ipmmu-vmsa: Clarify supported platforms

2018-07-25 Thread Geert Uytterhoeven
The Renesas IPMMU-VMSA driver supports not just R-Car H2 and M2 SoCs,
but also other R-Car Gen2 and R-Car Gen3 SoCs.

Drop a superfluous "Renesas" while at it.

Signed-off-by: Geert Uytterhoeven 
---
 drivers/iommu/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 568ae81b0e99b67b..c69dc0b29b5df37f 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -306,8 +306,8 @@ config IPMMU_VMSA
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU
help
- Support for the Renesas VMSA-compatible IPMMU Renesas found in the
- R-Mobile APE6 and R-Car H2/M2 SoCs.
+ Support for the Renesas VMSA-compatible IPMMU found in the R-Mobile
+ APE6, R-Car Gen2, and R-Car Gen3 SoCs.
 
  If unsure, say N.
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7 v6] Documentation: fsl-mc: add iommu-map device-tree binding for fsl-mc bus

2018-07-25 Thread Robin Murphy

On 09/07/18 12:18, Nipun Gupta wrote:

The existing IOMMU bindings cannot be used to specify the relationship
between fsl-mc devices and IOMMUs. This patch adds a generic binding for
mapping fsl-mc devices to IOMMUs, using iommu-map property.


No more nits from me :)

Acked-by: Robin Murphy 


Signed-off-by: Nipun Gupta 
Reviewed-by: Rob Herring 
---
  .../devicetree/bindings/misc/fsl,qoriq-mc.txt  | 39 ++
  1 file changed, 39 insertions(+)

diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt 
b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
index 6611a7c..01fdc33 100644
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@@ -9,6 +9,25 @@ blocks that can be used to create functional hardware 
objects/devices
  such as network interfaces, crypto accelerator instances, L2 switches,
  etc.
  
+For an overview of the DPAA2 architecture and fsl-mc bus see:

+Documentation/networking/dpaa2/overview.rst
+
+As described in the above overview, all DPAA2 objects in a DPRC share the
+same hardware "isolation context" and a 10-bit value called an ICID
+(isolation context id) is expressed by the hardware to identify
+the requester.
+
+The generic 'iommus' property is insufficient to describe the relationship
+between ICIDs and IOMMUs, so an iommu-map property is used to define
+the set of possible ICIDs under a root DPRC and how they map to
+an IOMMU.
+
+For generic IOMMU bindings, see
+Documentation/devicetree/bindings/iommu/iommu.txt.
+
+For arm-smmu binding, see:
+Documentation/devicetree/bindings/iommu/arm,smmu.txt.
+
  Required properties:
  
  - compatible

@@ -88,14 +107,34 @@ Sub-nodes:
Value type: 
Definition: Specifies the phandle to the PHY device node 
associated
with the this dpmac.
+Optional properties:
+
+- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier
+  data.
+
+  The property is an arbitrary number of tuples of
+  (icid-base,iommu,iommu-base,length).
+
+  Any ICID i in the interval [icid-base, icid-base + length) is
+  associated with the listed IOMMU, with the iommu-specifier
+  (i - icid-base + iommu-base).
  
  Example:
  
+smmu: iommu@500 {

+   compatible = "arm,mmu-500";
+   #iommu-cells = <1>;
+   stream-match-mask = <0x7C00>;
+   ...
+};
+
  fsl_mc: fsl-mc@80c00 {
  compatible = "fsl,qoriq-mc";
  reg = <0x0008 0x0c00 0 0x40>,/* MC portal base */
<0x 0x0834 0 0x4>; /* MC control reg */
  msi-parent = <&its>;
+/* define map for ICIDs 23-64 */
+iommu-map = <23 &smmu 23 41>;
  #address-cells = <3>;
  #size-cells = <1>;
  


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 5/7 v6] bus/fsl-mc: support dma configure for devices on fsl-mc bus

2018-07-25 Thread Robin Murphy

On 09/07/18 12:18, Nipun Gupta wrote:

This patch adds support of dma configuration for devices on fsl-mc
bus using 'dma_configure' callback for busses. Also, directly calling
arch_setup_dma_ops is removed from the fsl-mc bus.


Reviewed-by: Robin Murphy 


Signed-off-by: Nipun Gupta 
Reviewed-by: Laurentiu Tudor 
---
  drivers/bus/fsl-mc/fsl-mc-bus.c | 15 +++
  1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 5d8266c..fa43c7d 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -127,6 +127,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct 
kobj_uevent_env *env)
return 0;
  }
  
+static int fsl_mc_dma_configure(struct device *dev)

+{
+   struct device *dma_dev = dev;
+
+   while (dev_is_fsl_mc(dma_dev))
+   dma_dev = dma_dev->parent;
+
+   return of_dma_configure(dev, dma_dev->of_node, 0);
+}
+
  static ssize_t modalias_show(struct device *dev, struct device_attribute 
*attr,
 char *buf)
  {
@@ -148,6 +158,7 @@ struct bus_type fsl_mc_bus_type = {
.name = "fsl-mc",
.match = fsl_mc_bus_match,
.uevent = fsl_mc_bus_uevent,
+   .dma_configure  = fsl_mc_dma_configure,
.dev_groups = fsl_mc_dev_groups,
  };
  EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
@@ -633,10 +644,6 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc,
goto error_cleanup_dev;
}
  
-	/* Objects are coherent, unless 'no shareability' flag set. */

-   if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY))
-   arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true);
-
/*
 * The device-specific probe callback will get invoked by device_add()
 */


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 4/7 v6] iommu/arm-smmu: Add support for the fsl-mc bus

2018-07-25 Thread Robin Murphy

On 09/07/18 12:18, Nipun Gupta wrote:

Implement bus specific support for the fsl-mc bus including
registering arm_smmu_ops and bus specific device add operations.


I guess this is about as neat as it can get;

Reviewed-by: Robin Murphy 


Signed-off-by: Nipun Gupta 
---
  drivers/iommu/arm-smmu.c |  7 +++
  drivers/iommu/iommu.c| 13 +
  include/linux/fsl/mc.h   |  8 
  include/linux/iommu.h|  2 ++
  4 files changed, 30 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f7a96bc..a011bb6 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -52,6 +52,7 @@
  #include 
  
  #include 

+#include 
  
  #include "io-pgtable.h"

  #include "arm-smmu-regs.h"
@@ -1459,6 +1460,8 @@ static struct iommu_group *arm_smmu_device_group(struct 
device *dev)
  
  	if (dev_is_pci(dev))

group = pci_device_group(dev);
+   else if (dev_is_fsl_mc(dev))
+   group = fsl_mc_device_group(dev);
else
group = generic_device_group(dev);
  
@@ -2037,6 +2040,10 @@ static void arm_smmu_bus_init(void)

bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
}
  #endif
+#ifdef CONFIG_FSL_MC_BUS
+   if (!iommu_present(&fsl_mc_bus_type))
+   bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
+#endif
  }
  
  static int arm_smmu_device_probe(struct platform_device *pdev)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d227b86..df2f49e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -32,6 +32,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  
  static struct kset *iommu_group_kset;

@@ -988,6 +989,18 @@ struct iommu_group *pci_device_group(struct device *dev)
return iommu_group_alloc();
  }
  
+/* Get the IOMMU group for device on fsl-mc bus */

+struct iommu_group *fsl_mc_device_group(struct device *dev)
+{
+   struct device *cont_dev = fsl_mc_cont_dev(dev);
+   struct iommu_group *group;
+
+   group = iommu_group_get(cont_dev);
+   if (!group)
+   group = iommu_group_alloc();
+   return group;
+}
+
  /**
   * iommu_group_get_for_dev - Find or create the IOMMU group for a device
   * @dev: target device
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index f27cb14..dddaca1 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -351,6 +351,14 @@ struct fsl_mc_io {
  #define dev_is_fsl_mc(_dev) (0)
  #endif
  
+/* Macro to check if a device is a container device */

+#define fsl_mc_is_cont_dev(_dev) (to_fsl_mc_device(_dev)->flags & \
+   FSL_MC_IS_DPRC)
+
+/* Macro to get the container device of a MC device */
+#define fsl_mc_cont_dev(_dev) (fsl_mc_is_cont_dev(_dev) ? \
+   (_dev) : (_dev)->parent)
+
  /*
   * module_fsl_mc_driver() - Helper macro for drivers that don't do
   * anything special in module init/exit.  This eliminates a lot of
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7447b0b..209891d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -389,6 +389,8 @@ static inline size_t iommu_map_sg(struct iommu_domain 
*domain,
  extern struct iommu_group *pci_device_group(struct device *dev);
  /* Generic device grouping function */
  extern struct iommu_group *generic_device_group(struct device *dev);
+/* FSL-MC device grouping function */
+struct iommu_group *fsl_mc_device_group(struct device *dev);
  
  /**

   * struct iommu_fwspec - per-device IOMMU instance data


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/7] Stop losing firmware-set DMA masks

2018-07-25 Thread Will Deacon
On Wed, Jul 25, 2018 at 01:12:56PM +0100, Robin Murphy wrote:
> On 25/07/18 12:31, Christoph Hellwig wrote:
> >Hi Robin,
> >
> >this series looks fine to me.  Do you want me to pull this into the
> >dma-mapping tree?  I'd like to see a few more ACKs from the ACPI/OF/
> >IOMMU maintainers though.
> 
> Thanks, I was indeed assuming that the dma-mapping tree would be the best
> route to keep this lot together, hence including patch #1 even though it's
> not functionally related (having spoken to Will offline I think he's happy
> for that one to go through you rather than via arm64 this time around, even
> if #2-#7 don't make it).

Yes, that's right. Here's my ack if it helps:

Acked-by: Will Deacon 

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/7] Stop losing firmware-set DMA masks

2018-07-25 Thread Robin Murphy

On 25/07/18 12:31, Christoph Hellwig wrote:

Hi Robin,

this series looks fine to me.  Do you want me to pull this into the
dma-mapping tree?  I'd like to see a few more ACKs from the ACPI/OF/
IOMMU maintainers though.


Thanks, I was indeed assuming that the dma-mapping tree would be the 
best route to keep this lot together, hence including patch #1 even 
though it's not functionally related (having spoken to Will offline I 
think he's happy for that one to go through you rather than via arm64 
this time around, even if #2-#7 don't make it).


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/7] Stop losing firmware-set DMA masks

2018-07-25 Thread Joerg Roedel
On Wed, Jul 25, 2018 at 01:31:22PM +0200, Christoph Hellwig wrote:
> Hi Robin,
> 
> this series looks fine to me.  Do you want me to pull this into the
> dma-mapping tree?  I'd like to see a few more ACKs from the ACPI/OF/
> IOMMU maintainers though.

For the IOMMU parts:

Acked-by: Joerg Roedel 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] iommu/arm-smmu: Error out only if not enough context interrupts

2018-07-25 Thread Will Deacon
On Tue, Jul 24, 2018 at 03:09:41PM +0530, Vivek Gautam wrote:
> On 7/24/2018 2:06 PM, Will Deacon wrote:
> >On Thu, Jul 19, 2018 at 11:23:56PM +0530, Vivek Gautam wrote:
> >>diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> >>index 7c69736a30f8..4cb53bf4f423 100644
> >>--- a/drivers/iommu/arm-smmu.c
> >>+++ b/drivers/iommu/arm-smmu.c
> >>@@ -2229,12 +2229,19 @@ static int arm_smmu_device_probe(struct 
> >>platform_device *pdev)
> >>if (err)
> >>return err;
> >>-   if (smmu->version == ARM_SMMU_V2 &&
> >>-   smmu->num_context_banks != smmu->num_context_irqs) {
> >>-   dev_err(dev,
> >>-   "found only %d context interrupt(s) but %d required\n",
> >>-   smmu->num_context_irqs, smmu->num_context_banks);
> >>-   return -ENODEV;
> >>+   if (smmu->version == ARM_SMMU_V2) {
> >>+   if (smmu->num_context_banks > smmu->num_context_irqs) {
> >>+   dev_err(dev,
> >>+ "found only %d context irq(s) but %d required\n",
> >>+ smmu->num_context_irqs, smmu->num_context_banks);
> >>+   return -ENODEV;
> >>+   } else if (smmu->num_context_banks < smmu->num_context_irqs) {
> >>+   /* loose extra context interrupts */
> >>+   dev_notice(dev,
> >>+ "found %d context irq(s) but only %d required\n",
> >>+ smmu->num_context_irqs, smmu->num_context_banks);
> >>+   smmu->num_context_irqs = smmu->num_context_banks;
> >>+   }
> >I don't see the utility in the new message. Can you simplify with the patch
> >below on top? It's a bit weird that we only decide to ignore the extra irqs
> >after calling platform_get_irq() on them, but that seems to be harmless.
> 
> Thanks. I will modify as suggested below and respin.

It's ok, I can make the change locally.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/6] swiotlb: share more code between map_page and map_sg

2018-07-25 Thread Christoph Hellwig
Refactor all the common code into what previously was map_single, which
is now renamed to __swiotlb_map_page.  This also improves the map_sg
error handling and diagnostics to match the map_page ones.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 114 ---
 1 file changed, 53 insertions(+), 61 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e1fd03cf..8ca0964ebf3a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -593,21 +593,47 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 /*
  * Allocates bounce buffer and returns its physical address.
  */
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-  enum dma_data_direction dir, unsigned long attrs)
+static int
+__swiotlb_map_page(struct device *dev, phys_addr_t phys, size_t size,
+   enum dma_data_direction dir, unsigned long attrs,
+   dma_addr_t *dma_addr)
 {
-   dma_addr_t start_dma_addr;
-
-   if (swiotlb_force == SWIOTLB_NO_FORCE) {
-   dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
-&phys);
-   return SWIOTLB_MAP_ERROR;
+   phys_addr_t map_addr;
+
+   if (WARN_ON_ONCE(dir == DMA_NONE))
+   return -EINVAL;
+   *dma_addr = phys_to_dma(dev, phys);
+
+   switch (swiotlb_force) {
+   case SWIOTLB_NO_FORCE:
+   dev_warn_ratelimited(dev,
+   "swiotlb: force disabled for address %pa\n", &phys);
+   return -EOPNOTSUPP;
+   case SWIOTLB_NORMAL:
+   /* can we address the memory directly? */
+   if (dma_capable(dev, *dma_addr, size))
+   return 0;
+   break;
+   case SWIOTLB_FORCE:
+   break;
}
 
-   start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
-   return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
- dir, attrs);
+   trace_swiotlb_bounced(dev, *dma_addr, size, swiotlb_force);
+   map_addr = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
+   phys, size, dir, attrs);
+   if (unlikely(map_addr == SWIOTLB_MAP_ERROR))
+   return -ENOMEM;
+
+   /* Ensure that the address returned is DMA'ble */
+   *dma_addr = __phys_to_dma(dev, map_addr);
+   if (unlikely(!dma_capable(dev, *dma_addr, size))) {
+   dev_err_ratelimited(dev,
+   "DMA: swiotlb buffer not addressable.\n");
+   swiotlb_tbl_unmap_single(dev, map_addr, size, dir,
+   attrs | DMA_ATTR_SKIP_CPU_SYNC);
+   return -EINVAL;
+   }
+   return 0;
 }
 
 /*
@@ -773,35 +799,12 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct 
page *page,
enum dma_data_direction dir,
unsigned long attrs)
 {
-   phys_addr_t map, phys = page_to_phys(page) + offset;
-   dma_addr_t dev_addr = phys_to_dma(dev, phys);
-
-   BUG_ON(dir == DMA_NONE);
-   /*
-* If the address happens to be in the device's DMA window,
-* we can safely return the device addr and not worry about bounce
-* buffering it.
-*/
-   if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
-   return dev_addr;
+   dma_addr_t dma_addr;
 
-   trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
-   /* Oh well, have to allocate and map a bounce buffer. */
-   map = map_single(dev, phys, size, dir, attrs);
-   if (map == SWIOTLB_MAP_ERROR)
+   if (unlikely(__swiotlb_map_page(dev, page_to_phys(page) + offset, size,
+   dir, attrs, &dma_addr) < 0))
return __phys_to_dma(dev, io_tlb_overflow_buffer);
-
-   dev_addr = __phys_to_dma(dev, map);
-
-   /* Ensure that the address returned is DMA'ble */
-   if (dma_capable(dev, dev_addr, size))
-   return dev_addr;
-
-   attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-   swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
-   return __phys_to_dma(dev, io_tlb_overflow_buffer);
+   return dma_addr;
 }
 
 /*
@@ -892,37 +895,26 @@ swiotlb_sync_single_for_device(struct device *hwdev, 
dma_addr_t dev_addr,
  * same here.
  */
 int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems,
 enum dma_data_direction dir, unsigned long attrs)
 {
struct scatterlist *sg;
int i;
 
-   BUG_ON(dir == DMA_NONE);
-
for_each_sg(sgl, sg, nelems, i) {
-   phys_addr_t paddr = sg_phys(sg);
-   dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
-   if (swiotlb_force == SWIOTLB_FORCE ||
-   !dma_ca

[PATCH 4/6] swiotlb: mark is_swiotlb_buffer static

2018-07-25 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
---
 include/linux/swiotlb.h | 1 -
 kernel/dma/swiotlb.c| 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 965be92c33b5..7ef541ce8f34 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -121,7 +121,6 @@ static inline unsigned int swiotlb_max_segment(void) { 
return 0; }
 #endif
 
 extern void swiotlb_print_info(void);
-extern int is_swiotlb_buffer(phys_addr_t paddr);
 extern void swiotlb_set_max_segment(unsigned int);
 
 extern const struct dma_map_ops swiotlb_dma_ops;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 03016221fc64..e1fd03cf 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -429,7 +429,7 @@ void __init swiotlb_exit(void)
max_segment = 0;
 }
 
-int is_swiotlb_buffer(phys_addr_t paddr)
+static int is_swiotlb_buffer(phys_addr_t paddr)
 {
return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/6] swiotlb: respect DMA_ATTR_NO_WARN in __swiotlb_map_page

2018-07-25 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8ca0964ebf3a..5c3db7c89e0f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -606,8 +606,11 @@ __swiotlb_map_page(struct device *dev, phys_addr_t phys, 
size_t size,
 
switch (swiotlb_force) {
case SWIOTLB_NO_FORCE:
-   dev_warn_ratelimited(dev,
-   "swiotlb: force disabled for address %pa\n", &phys);
+   if (!(attrs & DMA_ATTR_NO_WARN)) {
+   dev_warn_ratelimited(dev,
+   "swiotlb: force disabled for address %pa\n",
+   &phys);
+   }
return -EOPNOTSUPP;
case SWIOTLB_NORMAL:
/* can we address the memory directly? */
@@ -627,10 +630,12 @@ __swiotlb_map_page(struct device *dev, phys_addr_t phys, 
size_t size,
/* Ensure that the address returned is DMA'ble */
*dma_addr = __phys_to_dma(dev, map_addr);
if (unlikely(!dma_capable(dev, *dma_addr, size))) {
-   dev_err_ratelimited(dev,
-   "DMA: swiotlb buffer not addressable.\n");
+   if (!(attrs & DMA_ATTR_NO_WARN)) {
+   dev_err_ratelimited(dev,
+   "DMA: swiotlb buffer not addressable.\n");
+   }
swiotlb_tbl_unmap_single(dev, map_addr, size, dir,
-   attrs | DMA_ATTR_SKIP_CPU_SYNC);
+   attrs | DMA_ATTR_SKIP_CPU_SYNC);
return -EINVAL;
}
return 0;
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/6] swiotlb: merge swiotlb_unmap_page and unmap_single

2018-07-25 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 06cfc4d00325..03016221fc64 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -812,9 +812,9 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page 
*page,
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-size_t size, enum dma_data_direction dir,
-unsigned long attrs)
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+   size_t size, enum dma_data_direction dir,
+   unsigned long attrs)
 {
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
@@ -837,13 +837,6 @@ static void unmap_single(struct device *hwdev, dma_addr_t 
dev_addr,
dma_mark_clean(phys_to_virt(paddr), size);
 }
 
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
 /*
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
@@ -947,7 +940,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct 
scatterlist *sgl,
BUG_ON(dir == DMA_NONE);
 
for_each_sg(sgl, sg, nelems, i)
-   unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
+   swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir,
 attrs);
 }
 
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/6] swiotlb: do not panic on mapping failures

2018-07-25 Thread Christoph Hellwig
All properly written drivers now have error handling in the
dma_map_single / dma_map_page callers.  As swiotlb_tbl_map_single already
prints a useful warning when running out of swiotlb pool swace we can
also remove swiotlb_full entirely as it serves no purpose now.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 33 +
 1 file changed, 1 insertion(+), 32 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9062b14bc7f4..06cfc4d00325 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -761,34 +761,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t 
size,
return true;
 }
 
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
-int do_panic)
-{
-   if (swiotlb_force == SWIOTLB_NO_FORCE)
-   return;
-
-   /*
-* Ran out of IOMMU space for this operation. This is very bad.
-* Unfortunately the drivers cannot handle this operation properly.
-* unless they check for dma_mapping_error (most don't)
-* When the mapping is small enough return a static buffer to limit
-* the damage, or panic when the transfer is too big.
-*/
-   dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
-   size);
-
-   if (size <= io_tlb_overflow || !do_panic)
-   return;
-
-   if (dir == DMA_BIDIRECTIONAL)
-   panic("DMA: Random memory could be DMA accessed\n");
-   if (dir == DMA_FROM_DEVICE)
-   panic("DMA: Random memory could be DMA written\n");
-   if (dir == DMA_TO_DEVICE)
-   panic("DMA: Random memory could be DMA read\n");
-}
-
 /*
  * Map a single buffer of the indicated size for DMA in streaming mode.  The
  * physical address to use is returned.
@@ -817,10 +789,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct 
page *page,
 
/* Oh well, have to allocate and map a bounce buffer. */
map = map_single(dev, phys, size, dir, attrs);
-   if (map == SWIOTLB_MAP_ERROR) {
-   swiotlb_full(dev, size, dir, 1);
+   if (map == SWIOTLB_MAP_ERROR)
return __phys_to_dma(dev, io_tlb_overflow_buffer);
-   }
 
dev_addr = __phys_to_dma(dev, map);
 
@@ -948,7 +918,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct 
scatterlist *sgl, int nelems,
if (map == SWIOTLB_MAP_ERROR) {
/* Don't panic here, we expect map_sg users
   to do proper error handling. */
-   swiotlb_full(hwdev, sg->length, dir, 0);
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
   attrs);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/6] swiotlb: remove a pointless comment

2018-07-25 Thread Christoph Hellwig
This comments describes an aspect of the map_sg interface that isn't
even exploited by swiotlb.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 4f8a6dbf0b60..9062b14bc7f4 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -925,12 +925,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, 
dma_addr_t dev_addr,
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
  *
- * NOTE: An implementation may be able to use a smaller number of
- *   DMA address/length pairs than there are SG table elements.
- *   (for example via virtual mapping capabilities)
- *   The routine returns the number of addr/length pairs actually
- *   used, at most nents.
- *
  * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


swiotlb cleanup (resend)

2018-07-25 Thread Christoph Hellwig
Hi Konrad,

below are a few swiotlb patches.  Mostly just cleanups, but the removal
of the panic option is an actual change in (rarely used) functionality.

I'd be happy to pick them up through the dma-mapping tree if you are
fine with that.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] dma-mapping: Relax warning for per-device areas

2018-07-25 Thread Christoph Hellwig
Thanks, I'll pull this into the dma-mapping tree.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/7] Stop losing firmware-set DMA masks

2018-07-25 Thread Christoph Hellwig
Hi Robin,

this series looks fine to me.  Do you want me to pull this into the
dma-mapping tree?  I'd like to see a few more ACKs from the ACPI/OF/
IOMMU maintainers though.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 4/7] of/device: Set bus DMA mask as appropriate

2018-07-25 Thread Christoph Hellwig
On Mon, Jul 23, 2018 at 11:16:09PM +0100, Robin Murphy wrote:
> When an explicit DMA limit is described by firmware, we need to remember
> it regardless of how drivers might subsequently update their devices'
> masks. The new bus_dma_mask field does that.

Looks good,

Reviewed-by: Christoph Hellwig 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/7] ACPI/IORT: Set bus DMA mask as appropriate

2018-07-25 Thread Christoph Hellwig
On Mon, Jul 23, 2018 at 11:16:08PM +0100, Robin Murphy wrote:
> When an explicit DMA limit is described by firmware, we need to remember
> it regardless of how drivers might subsequently update their devices'
> masks. The new bus_dma_mask field does that.

Looks good,

Reviewed-by: Christoph Hellwig 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 2/7] dma-mapping: Generalise dma_32bit_limit flag

2018-07-25 Thread Christoph Hellwig
On Mon, Jul 23, 2018 at 11:16:07PM +0100, Robin Murphy wrote:
> Whilst the notion of an upstream DMA restriction is most commonly seen
> in PCI host bridges saddled with a 32-bit native interface, a more
> general version of the same issue can exist on complex SoCs where a bus
> or point-to-point interconnect link from a device's DMA master interface
> to another component along the path to memory (often an IOMMU) may carry
> fewer address bits than the interfaces at both ends nominally support.
> In order to properly deal with this, the first step is to expand the
> dma_32bit_limit flag into an arbitrary mask.
> 
> To minimise the impact on existing code, we'll make sure to only
> consider this new mask valid if set. That makes sense anyway, since a
> mask of zero would represent DMA not being wired up at all, and that
> would be better handled by not providing valid ops in the first place.
> 
> Signed-off-by: Robin Murphy 

Looks good,

Reviewed-by: Christoph Hellwig 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/5] sh: use generic dma_noncoherent_ops

2018-07-25 Thread Christoph Hellwig
Switch to the generic noncoherent direct mapping implementation.

Signed-off-by: Christoph Hellwig 
---
 arch/sh/Kconfig   |  3 +-
 arch/sh/include/asm/Kbuild|  1 +
 arch/sh/include/asm/dma-mapping.h | 26 ---
 arch/sh/kernel/Makefile   |  2 +-
 arch/sh/kernel/dma-coherent.c | 23 +
 arch/sh/kernel/dma-nommu.c| 78 ---
 6 files changed, 15 insertions(+), 118 deletions(-)
 delete mode 100644 arch/sh/include/asm/dma-mapping.h
 delete mode 100644 arch/sh/kernel/dma-nommu.c

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c9993a0cdc7e..da4db4b5359f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -51,7 +51,6 @@ config SUPERH
select HAVE_ARCH_AUDITSYSCALL
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_NMI
-   select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
 
help
@@ -164,6 +163,8 @@ config DMA_COHERENT
 
 config DMA_NONCOHERENT
def_bool !DMA_COHERENT
+   select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+   select DMA_NONCOHERENT_OPS
 
 config PGTABLE_LEVELS
default 3 if X2TLB
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 46dd82ab2c29..6a5609a55965 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -2,6 +2,7 @@ generic-y += compat.h
 generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += irq_regs.h
diff --git a/arch/sh/include/asm/dma-mapping.h 
b/arch/sh/include/asm/dma-mapping.h
deleted file mode 100644
index 1ebc6a4eb1c5..
--- a/arch/sh/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_SH_DMA_MAPPING_H
-#define __ASM_SH_DMA_MAPPING_H
-
-extern const struct dma_map_ops nommu_dma_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-#ifdef CONFIG_DMA_NONCOHERENT
-   return &nommu_dma_ops;
-#else
-   return &dma_direct_ops;
-#endif
-}
-
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-   dma_addr_t *dma_addr, gfp_t flag,
-   unsigned long attrs);
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs);
-
-void sh_sync_dma_for_device(void *vaddr, size_t size,
-   enum dma_data_direction dir);
-
-#endif /* __ASM_SH_DMA_MAPPING_H */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index d5ddb64bfffe..59673f8a3379 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -45,7 +45,7 @@ obj-$(CONFIG_DUMP_CODE)   += disassemble.o
 obj-$(CONFIG_HIBERNATION)  += swsusp.o
 obj-$(CONFIG_DWARF_UNWINDER)   += dwarf.o
 obj-$(CONFIG_PERF_EVENTS)  += perf_event.o perf_callchain.o
-obj-$(CONFIG_DMA_NONCOHERENT)  += dma-nommu.o dma-coherent.o
+obj-$(CONFIG_DMA_NONCOHERENT)  += dma-coherent.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)   += hw_breakpoint.o
 
 ccflags-y := -Werror
diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c
index 2518065d5d27..a0021eef956b 100644
--- a/arch/sh/kernel/dma-coherent.c
+++ b/arch/sh/kernel/dma-coherent.c
@@ -7,14 +7,13 @@
  */
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
 
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-dma_addr_t *dma_handle, gfp_t gfp,
-unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+   gfp_t gfp, unsigned long attrs)
 {
void *ret, *ret_nocache;
int order = get_order(size);
@@ -29,7 +28,8 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
 * Pages from the page allocator may have data present in
 * cache. So flush the cache before using uncached memory.
 */
-   sh_sync_dma_for_device(ret, size, DMA_BIDIRECTIONAL);
+   arch_sync_dma_for_device(dev, virt_to_phys(ret), size,
+   DMA_BIDIRECTIONAL);
 
ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
if (!ret_nocache) {
@@ -46,9 +46,8 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
return ret_nocache;
 }
 
-void dma_generic_free_coherent(struct device *dev, size_t size,
-  void *vaddr, dma_addr_t dma_handle,
-  unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+   dma_addr_t dma_handle, unsigned long attrs)
 {
int order = get_order(size);
unsigned long pfn = (dma_handle >> PAGE_SHIFT);
@@ -63,12 +62,12 @@ void dma_generic_free_cohere

[PATCH 4/5] sh: split arch/sh/mm/consistent.c

2018-07-25 Thread Christoph Hellwig
Half of the file just contains platform device memory setup code which
is required for all builds, and half contains helpers for dma coherent
allocation, which is only needed if CONFIG_DMA_NONCOHERENT is enabled.

Signed-off-by: Christoph Hellwig 
---
 arch/sh/kernel/Makefile   |  2 +-
 arch/sh/kernel/dma-coherent.c | 84 +++
 arch/sh/mm/consistent.c   | 80 -
 3 files changed, 85 insertions(+), 81 deletions(-)
 create mode 100644 arch/sh/kernel/dma-coherent.c

diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index cb5f1bfb52de..d5ddb64bfffe 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -45,7 +45,7 @@ obj-$(CONFIG_DUMP_CODE)   += disassemble.o
 obj-$(CONFIG_HIBERNATION)  += swsusp.o
 obj-$(CONFIG_DWARF_UNWINDER)   += dwarf.o
 obj-$(CONFIG_PERF_EVENTS)  += perf_event.o perf_callchain.o
-obj-$(CONFIG_DMA_NONCOHERENT)  += dma-nommu.o
+obj-$(CONFIG_DMA_NONCOHERENT)  += dma-nommu.o dma-coherent.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)   += hw_breakpoint.o
 
 ccflags-y := -Werror
diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c
new file mode 100644
index ..2518065d5d27
--- /dev/null
+++ b/arch/sh/kernel/dma-coherent.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2004 - 2007  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+dma_addr_t *dma_handle, gfp_t gfp,
+unsigned long attrs)
+{
+   void *ret, *ret_nocache;
+   int order = get_order(size);
+
+   gfp |= __GFP_ZERO;
+
+   ret = (void *)__get_free_pages(gfp, order);
+   if (!ret)
+   return NULL;
+
+   /*
+* Pages from the page allocator may have data present in
+* cache. So flush the cache before using uncached memory.
+*/
+   sh_sync_dma_for_device(ret, size, DMA_BIDIRECTIONAL);
+
+   ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
+   if (!ret_nocache) {
+   free_pages((unsigned long)ret, order);
+   return NULL;
+   }
+
+   split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
+
+   *dma_handle = virt_to_phys(ret);
+   if (!WARN_ON(!dev))
+   *dma_handle -= PFN_PHYS(dev->dma_pfn_offset);
+
+   return ret_nocache;
+}
+
+void dma_generic_free_coherent(struct device *dev, size_t size,
+  void *vaddr, dma_addr_t dma_handle,
+  unsigned long attrs)
+{
+   int order = get_order(size);
+   unsigned long pfn = (dma_handle >> PAGE_SHIFT);
+   int k;
+
+   if (!WARN_ON(!dev))
+   pfn += dev->dma_pfn_offset;
+
+   for (k = 0; k < (1 << order); k++)
+   __free_pages(pfn_to_page(pfn + k), 0);
+
+   iounmap(vaddr);
+}
+
+void sh_sync_dma_for_device(void *vaddr, size_t size,
+   enum dma_data_direction direction)
+{
+   void *addr = sh_cacheop_vaddr(vaddr);
+
+   switch (direction) {
+   case DMA_FROM_DEVICE:   /* invalidate only */
+   __flush_invalidate_region(addr, size);
+   break;
+   case DMA_TO_DEVICE: /* writeback only */
+   __flush_wback_region(addr, size);
+   break;
+   case DMA_BIDIRECTIONAL: /* writeback and invalidate */
+   __flush_purge_region(addr, size);
+   break;
+   default:
+   BUG();
+   }
+}
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 1622ae6b9dbd..792f36129062 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -1,10 +1,6 @@
 /*
- * arch/sh/mm/consistent.c
- *
  * Copyright (C) 2004 - 2007  Paul Mundt
  *
- * Declared coherent memory functions based on arch/x86/kernel/pci-dma_32.c
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -13,83 +9,7 @@
 #include 
 #include 
 #include 
-#include 
 #include 
-#include 
-#include 
-#include 
-#include 
-
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-dma_addr_t *dma_handle, gfp_t gfp,
-unsigned long attrs)
-{
-   void *ret, *ret_nocache;
-   int order = get_order(size);
-
-   gfp |= __GFP_ZERO;
-
-   ret = (void *)__get_free_pages(gfp, order);
-   if (!ret)
-   return NULL;
-
-   /*
-* Pages from the page allocator may have data present in
-* cache. So flush the cache before using u

[PATCH 3/5] sh: use dma_direct_ops for the CONFIG_DMA_COHERENT case

2018-07-25 Thread Christoph Hellwig
This is a slight change in behavior as we avoid the detour through the
virtual mapping for the coherent allocator, but if this CPU really is
coherent that should be the right thing to do.

Signed-off-by: Christoph Hellwig 
---
 arch/sh/Kconfig   | 1 +
 arch/sh/include/asm/dma-mapping.h | 4 
 arch/sh/kernel/Makefile   | 4 ++--
 arch/sh/kernel/dma-nommu.c| 4 
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index dd4f3d3e644f..c9993a0cdc7e 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -159,6 +159,7 @@ config SWAP_IO_SPACE
bool
 
 config DMA_COHERENT
+   select DMA_DIRECT_OPS
bool
 
 config DMA_NONCOHERENT
diff --git a/arch/sh/include/asm/dma-mapping.h 
b/arch/sh/include/asm/dma-mapping.h
index 149e71f95be7..1ebc6a4eb1c5 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -6,7 +6,11 @@ extern const struct dma_map_ops nommu_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
+#ifdef CONFIG_DMA_NONCOHERENT
return &nommu_dma_ops;
+#else
+   return &dma_direct_ops;
+#endif
 }
 
 extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index dc80041f7363..cb5f1bfb52de 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -12,7 +12,7 @@ endif
 
 CFLAGS_REMOVE_return_address.o = -pg
 
-obj-y  := debugtraps.o dma-nommu.o dumpstack.o \
+obj-y  := debugtraps.o dumpstack.o \
   idle.o io.o irq.o irq_$(BITS).o kdebugfs.o   \
   machvec.o nmi_debug.o process.o  \
   process_$(BITS).o ptrace.o ptrace_$(BITS).o  \
@@ -45,7 +45,7 @@ obj-$(CONFIG_DUMP_CODE)   += disassemble.o
 obj-$(CONFIG_HIBERNATION)  += swsusp.o
 obj-$(CONFIG_DWARF_UNWINDER)   += dwarf.o
 obj-$(CONFIG_PERF_EVENTS)  += perf_event.o perf_callchain.o
-
+obj-$(CONFIG_DMA_NONCOHERENT)  += dma-nommu.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)   += hw_breakpoint.o
 
 ccflags-y := -Werror
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 79a9edafa5b0..d8689b1cb743 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -51,7 +51,6 @@ static int nommu_map_sg(struct device *dev, struct 
scatterlist *sg,
return nents;
 }
 
-#ifdef CONFIG_DMA_NONCOHERENT
 static void nommu_sync_single_for_device(struct device *dev, dma_addr_t addr,
  size_t size, enum dma_data_direction dir)
 {
@@ -67,16 +66,13 @@ static void nommu_sync_sg_for_device(struct device *dev, 
struct scatterlist *sg,
for_each_sg(sg, s, nelems, i)
sh_sync_dma_for_device(sg_virt(s), s->length, dir);
 }
-#endif
 
 const struct dma_map_ops nommu_dma_ops = {
.alloc  = dma_generic_alloc_coherent,
.free   = dma_generic_free_coherent,
.map_page   = nommu_map_page,
.map_sg = nommu_map_sg,
-#ifdef CONFIG_DMA_NONCOHERENT
.sync_single_for_device = nommu_sync_single_for_device,
.sync_sg_for_device = nommu_sync_sg_for_device,
-#endif
 };
 EXPORT_SYMBOL(nommu_dma_ops);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/5] sh: introduce a sh_cacheop_vaddr helper

2018-07-25 Thread Christoph Hellwig
And use it in the maple bus code to avoid a dma API dependency.

Signed-off-by: Christoph Hellwig 
---
 arch/sh/include/asm/cacheflush.h | 7 +++
 arch/sh/mm/consistent.c  | 6 +-
 drivers/sh/maple/maple.c | 7 ---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index d103ab5a4e4b..b932e42ef028 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -101,5 +101,12 @@ void kunmap_coherent(void *kvaddr);
 
 void cpu_cache_init(void);
 
+static inline void *sh_cacheop_vaddr(void *vaddr)
+{
+   if (__in_29bit_mode())
+   vaddr = (void *)CAC_ADDR((unsigned long)vaddr);
+   return vaddr;
+}
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_CACHEFLUSH_H */
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index e9d422bd42a5..1622ae6b9dbd 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -74,10 +74,7 @@ void dma_generic_free_coherent(struct device *dev, size_t 
size,
 void sh_sync_dma_for_device(void *vaddr, size_t size,
enum dma_data_direction direction)
 {
-   void *addr;
-
-   addr = __in_29bit_mode() ?
-  (void *)CAC_ADDR((unsigned long)vaddr) : vaddr;
+   void *addr = sh_cacheop_vaddr(vaddr);
 
switch (direction) {
case DMA_FROM_DEVICE:   /* invalidate only */
@@ -93,7 +90,6 @@ void sh_sync_dma_for_device(void *vaddr, size_t size,
BUG();
}
 }
-EXPORT_SYMBOL(sh_sync_dma_for_device);
 
 static int __init memchunk_setup(char *str)
 {
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index 2e45988d1259..e5d7fb81ad66 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -300,8 +300,8 @@ static void maple_send(void)
mutex_unlock(&maple_wlist_lock);
if (maple_packets > 0) {
for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
-   sh_sync_dma_for_device(maple_sendbuf + i * PAGE_SIZE,
-  PAGE_SIZE, DMA_BIDIRECTIONAL);
+   __flush_purge_region(maple_sendbuf + i * PAGE_SIZE,
+   PAGE_SIZE);
}
 
 finish:
@@ -642,7 +642,8 @@ static void maple_dma_handler(struct work_struct *work)
list_for_each_entry_safe(mq, nmq, &maple_sentq, list) {
mdev = mq->dev;
recvbuf = mq->recvbuf->buf;
-   sh_sync_dma_for_device(recvbuf, 0x400, DMA_FROM_DEVICE);
+   __flush_invalidate_region(sh_cacheop_vaddr(recvbuf),
+   0x400);
code = recvbuf[0];
kfree(mq->sendbuf);
list_del_init(&mq->list);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/5] sh: simplify get_arch_dma_ops

2018-07-25 Thread Christoph Hellwig
Remove the indirection through the dma_ops variable, and just return
nommu_dma_ops directly from get_arch_dma_ops.

Signed-off-by: Christoph Hellwig 
---
 arch/sh/include/asm/dma-mapping.h |  5 ++---
 arch/sh/kernel/dma-nommu.c|  8 +---
 arch/sh/mm/consistent.c   |  3 ---
 arch/sh/mm/init.c | 10 --
 4 files changed, 3 insertions(+), 23 deletions(-)

diff --git a/arch/sh/include/asm/dma-mapping.h 
b/arch/sh/include/asm/dma-mapping.h
index 41167931e5d9..149e71f95be7 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -2,12 +2,11 @@
 #ifndef __ASM_SH_DMA_MAPPING_H
 #define __ASM_SH_DMA_MAPPING_H
 
-extern const struct dma_map_ops *dma_ops;
-extern void no_iommu_init(void);
+extern const struct dma_map_ops nommu_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-   return dma_ops;
+   return &nommu_dma_ops;
 }
 
 extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 3e3a32fc676e..79a9edafa5b0 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -79,10 +79,4 @@ const struct dma_map_ops nommu_dma_ops = {
.sync_sg_for_device = nommu_sync_sg_for_device,
 #endif
 };
-
-void __init no_iommu_init(void)
-{
-   if (dma_ops)
-   return;
-   dma_ops = &nommu_dma_ops;
-}
+EXPORT_SYMBOL(nommu_dma_ops);
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index fceb2adfcac7..e9d422bd42a5 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -20,9 +20,6 @@
 #include 
 #include 
 
-const struct dma_map_ops *dma_ops;
-EXPORT_SYMBOL(dma_ops);
-
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 dma_addr_t *dma_handle, gfp_t gfp,
 unsigned long attrs)
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 4034035fbede..7713c084d040 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -339,22 +339,12 @@ void __init paging_init(void)
free_area_init_nodes(max_zone_pfns);
 }
 
-/*
- * Early initialization for any I/O MMUs we might have.
- */
-static void __init iommu_init(void)
-{
-   no_iommu_init();
-}
-
 unsigned int mem_init_done = 0;
 
 void __init mem_init(void)
 {
pg_data_t *pgdat;
 
-   iommu_init();
-
high_memory = NULL;
for_each_online_pgdat(pgdat)
high_memory = max_t(void *, high_memory,
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


use the generic dma-noncoherent code for sh V3

2018-07-25 Thread Christoph Hellwig
Hi all,

can you review these patches to switch sh to use the generic
dma-noncoherent code?  All the requirements are in mainline already
and we've switched various architectures over to it already.

Changes since V2:
 - drop a now obsolete export

Changes since V1:
 - fixed two stupid compile errors and verified them using a local
   cross toolchain instead of the 0day buildbot
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu