Re: [RFC 02/11] coresight: etm-perf: Allow an event to use different sinks

2020-11-12 Thread Linu Cherian
ecks for the first sink */
> > + if (!sink) {
> > + sink = new_sink;
> > + } else if (!sinks_match(new_sink, sink)) {
> > + cpumask_clear_cpu(cpu, mask);
> > + continue;
> > + }
> > + } else {
> > + new_sink = sink;
> > + }
> >
> >   /*
> >* Building a path doesn't enable it, it simply builds a
> >* list of devices from source to sink that can be
> >* referenced later when the path is actually needed.
> >*/
> > - path = coresight_build_path(csdev, sink);
> > + path = coresight_build_path(csdev, new_sink);
> >   if (IS_ERR(path)) {
> >   cpumask_clear_cpu(cpu, mask);
> >   continue;
> > @@ -284,7 +307,12 @@ static void *etm_setup_aux(struct perf_event *event, 
> > void **pages,
> >   if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer)
> >   goto err;
> >
> > - /* Allocate the sink buffer for this session */
> > + /*
> > +  * Allocate the sink buffer for this session. All the sinks
> > +  * where this event can be scheduled are ensured to be of the
> > +  * same type. Thus the same sink configuration is used by the
> > +  * sinks.
> > +  */
> >   event_data->snk_config =
> >   sink_ops(sink)->alloc_buffer(sink, event, pages,
> >nr_pages, overwrite);
> >
>

Perf record and report worked fine with this as well, with formatting
related opencsd hacks.

Tested-by : Linu Cherian 

Thanks.


Re: [PATCH AUTOSEL 5.9 075/147] coresight: Make sysfs functional on topologies with per core sink

2020-11-01 Thread Linu Cherian
Hi,

Upstream commit,

commit bb1860efc817c18fce4112f25f51043e44346d1b
Author: Linu Cherian 
Date:   Wed Sep 16 13:17:34 2020 -0600




coresight: etm: perf: Sink selection using sysfs is deprecated


need to go along with this, else there will be build breakage.
This applies for 5.4, 5.8 and 5.9

Mathieu, could you please ACK ?

Please let me know if i need to send the patch to
sta...@vger.kernel.org separately.
Thanks.




On Tue, Oct 27, 2020 at 5:20 AM Sasha Levin  wrote:
>
> From: Linu Cherian 
>
> [ Upstream commit 6d578258b955fce1bbd9a8fefe7b10065a84 ]
>
> Coresight driver assumes sink is common across all the ETMs,
> and tries to build a path between ETM and the first enabled
> sink found using bus based search. This breaks sysFS usage
> on implementations that has multiple per core sinks in
> enabled state.
>
> To fix this, coresight_get_enabled_sink API is updated to
> do a connection based search starting from the given source,
> instead of bus based search.
> With sink selection using sysfs depecrated for perf interface,
> provision for reset is removed as well in this API.
>
> Signed-off-by: Linu Cherian 
> [Fixed indentation problem and removed obsolete comment]
> Signed-off-by: Mathieu Poirier 
> Link: 
> https://lore.kernel.org/r/20200916191737.4001561-15-mathieu.poir...@linaro.org
> Signed-off-by: Greg Kroah-Hartman 
> Signed-off-by: Sasha Levin 
> ---
>  drivers/hwtracing/coresight/coresight-priv.h |  3 +-
>  drivers/hwtracing/coresight/coresight.c  | 62 +---
>  2 files changed, 29 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-priv.h 
> b/drivers/hwtracing/coresight/coresight-priv.h
> index f2dc625ea5856..5fe773c4d6cc5 100644
> --- a/drivers/hwtracing/coresight/coresight-priv.h
> +++ b/drivers/hwtracing/coresight/coresight-priv.h
> @@ -148,7 +148,8 @@ static inline void coresight_write_reg_pair(void __iomem 
> *addr, u64 val,
>  void coresight_disable_path(struct list_head *path);
>  int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data);
>  struct coresight_device *coresight_get_sink(struct list_head *path);
> -struct coresight_device *coresight_get_enabled_sink(bool reset);
> +struct coresight_device *
> +coresight_get_enabled_sink(struct coresight_device *source);
>  struct coresight_device *coresight_get_sink_by_id(u32 id);
>  struct coresight_device *
>  coresight_find_default_sink(struct coresight_device *csdev);
> diff --git a/drivers/hwtracing/coresight/coresight.c 
> b/drivers/hwtracing/coresight/coresight.c
> index e9c90f2de34ac..bb4f9e0a5438d 100644
> --- a/drivers/hwtracing/coresight/coresight.c
> +++ b/drivers/hwtracing/coresight/coresight.c
> @@ -540,50 +540,46 @@ struct coresight_device *coresight_get_sink(struct 
> list_head *path)
> return csdev;
>  }
>
> -static int coresight_enabled_sink(struct device *dev, const void *data)
> +static struct coresight_device *
> +coresight_find_enabled_sink(struct coresight_device *csdev)
>  {
> -   const bool *reset = data;
> -   struct coresight_device *csdev = to_coresight_device(dev);
> +   int i;
> +   struct coresight_device *sink;
>
> if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
>  csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) &&
> -csdev->activated) {
> -   /*
> -* Now that we have a handle on the sink for this session,
> -* disable the sysFS "enable_sink" flag so that possible
> -* concurrent perf session that wish to use another sink don't
> -* trip on it.  Doing so has no ramification for the current
> -* session.
> -*/
> -   if (*reset)
> -   csdev->activated = false;
> +csdev->activated)
> +   return csdev;
>
> -   return 1;
> +   /*
> +* Recursively explore each port found on this element.
> +*/
> +   for (i = 0; i < csdev->pdata->nr_outport; i++) {
> +   struct coresight_device *child_dev;
> +
> +   child_dev = csdev->pdata->conns[i].child_dev;
> +   if (child_dev)
> +   sink = coresight_find_enabled_sink(child_dev);
> +   if (sink)
> +   return sink;
> }
>
> -   return 0;
> +   return NULL;
>  }
>
>  /**
> - * coresight_get_enabled_sink - returns the first enabled sink found on the 
> bus
> - * @deactivate:Whether the 'enable_sink' flag should be reset
> - *
> - * When operated from perf the deactivate parameter should be s

Re: Handling active DMA during a VFIO application crash

2018-02-15 Thread Linu Cherian
Hi Alex,

On Thu Feb 15, 2018 at 09:21:09AM -0700, Alex Williamson wrote:
> On Thu, 15 Feb 2018 16:34:06 +0530
> Linu Cherian <linuc.dec...@gmail.com> wrote:
> 
> > Hi,
> > 
> > Was exploring the implications of an application crash while DMA 
> > is active from a vfio PCI device; the DMA being configured and 
> > started by the application using vfio APIs.   
> > 
> > The expectation is that, DMA is stopped/reset before we tear down the IOMMU 
> > mappings 
> > and finally free the mmapped pages(on which DMA is happening).  
> >   
> > 
> > From the below stack trace(with dump_stack in vfio_pci_release),
> >  
> > [  201.564273] [] vfio_pci_release+0x80/0x458
> > [  201.564276] [] vfio_device_fops_release+0x2c/0x50
> > [  201.564279] [] __fput+0x9c/0x218
> > [  201.564283] [] fput+0x20/0x30
> > [  201.564286] [] task_work_run+0xa0/0xc8
> > [  201.564289] [] do_exit+0x2bc/0x9c8
> > [  201.564293] [] do_group_exit+0x3c/0xa8
> > [  201.564296] [] get_signal+0x3e4/0x538
> > [  201.564299] [] do_signal+0x70/0x660
> > [  201.564302] [] do_notify_resume+0xe0/0x120
> > 
> >
> > 
> > PCI device is disabled/reset from vfio_pci_release invoked as part of 
> > device fd release. The fd releases are in turn invoked from exit_files
> > and exit_task_work.
> > 
> > But exit_mm, gets called before exit_files/exit_task_work in do_exit.
> > 
> >
> > Assuming all pages allocated/mmaped to a process gets freed in exit_mm, 
> > 
> >   
> > is there is a possibility that user pages configured for DMA can get freed 
> > to kernel before the vfio device is stopped/reset ? 
> 
> Pages mapped through the IOMMU are still pinned, so they have an
> elevated reference count and I believe therefore cannot "get freed to
> kernel".  Nothing should therefore be able to allocate those pages
> until the container is released, which happens even after the device is
> released.  Thanks,
> 
> Alex


Thanks for the clarification. I will dig through the code on this.

-- 
Linu cherian


Re: Handling active DMA during a VFIO application crash

2018-02-15 Thread Linu Cherian
Hi Alex,

On Thu Feb 15, 2018 at 09:21:09AM -0700, Alex Williamson wrote:
> On Thu, 15 Feb 2018 16:34:06 +0530
> Linu Cherian  wrote:
> 
> > Hi,
> > 
> > Was exploring the implications of an application crash while DMA 
> > is active from a vfio PCI device; the DMA being configured and 
> > started by the application using vfio APIs.   
> > 
> > The expectation is that, DMA is stopped/reset before we tear down the IOMMU 
> > mappings 
> > and finally free the mmapped pages(on which DMA is happening).  
> >   
> > 
> > From the below stack trace(with dump_stack in vfio_pci_release),
> >  
> > [  201.564273] [] vfio_pci_release+0x80/0x458
> > [  201.564276] [] vfio_device_fops_release+0x2c/0x50
> > [  201.564279] [] __fput+0x9c/0x218
> > [  201.564283] [] fput+0x20/0x30
> > [  201.564286] [] task_work_run+0xa0/0xc8
> > [  201.564289] [] do_exit+0x2bc/0x9c8
> > [  201.564293] [] do_group_exit+0x3c/0xa8
> > [  201.564296] [] get_signal+0x3e4/0x538
> > [  201.564299] [] do_signal+0x70/0x660
> > [  201.564302] [] do_notify_resume+0xe0/0x120
> > 
> >
> > 
> > PCI device is disabled/reset from vfio_pci_release invoked as part of 
> > device fd release. The fd releases are in turn invoked from exit_files
> > and exit_task_work.
> > 
> > But exit_mm, gets called before exit_files/exit_task_work in do_exit.
> > 
> >
> > Assuming all pages allocated/mmaped to a process gets freed in exit_mm, 
> > 
> >   
> > is there is a possibility that user pages configured for DMA can get freed 
> > to kernel before the vfio device is stopped/reset ? 
> 
> Pages mapped through the IOMMU are still pinned, so they have an
> elevated reference count and I believe therefore cannot "get freed to
> kernel".  Nothing should therefore be able to allocate those pages
> until the container is released, which happens even after the device is
> released.  Thanks,
> 
> Alex


Thanks for the clarification. I will dig through the code on this.

-- 
Linu cherian


Handling active DMA during a VFIO application crash

2018-02-15 Thread Linu Cherian
Hi,

Was exploring the implications of an application crash while DMA 
is active from a vfio PCI device; the DMA being configured and 
started by the application using vfio APIs.   

The expectation is that, DMA is stopped/reset before we tear down the IOMMU 
mappings 
and finally free the mmapped pages(on which DMA is happening).  
  

>From the below stack trace(with dump_stack in vfio_pci_release), 
[  201.564273] [] vfio_pci_release+0x80/0x458
[  201.564276] [] vfio_device_fops_release+0x2c/0x50
[  201.564279] [] __fput+0x9c/0x218
[  201.564283] [] fput+0x20/0x30
[  201.564286] [] task_work_run+0xa0/0xc8
[  201.564289] [] do_exit+0x2bc/0x9c8
[  201.564293] [] do_group_exit+0x3c/0xa8
[  201.564296] [] get_signal+0x3e4/0x538
[  201.564299] [] do_signal+0x70/0x660
[  201.564302] [] do_notify_resume+0xe0/0x120

   

PCI device is disabled/reset from vfio_pci_release invoked as part of 
device fd release. The fd releases are in turn invoked from exit_files
and exit_task_work.

But exit_mm, gets called before exit_files/exit_task_work in do_exit.

   
Assuming all pages allocated/mmaped to a process gets freed in exit_mm, 

  
is there is a possibility that user pages configured for DMA can get freed 
to kernel before the vfio device is stopped/reset ? 

Thanks.

-- 
Linu cherian


Handling active DMA during a VFIO application crash

2018-02-15 Thread Linu Cherian
Hi,

Was exploring the implications of an application crash while DMA 
is active from a vfio PCI device; the DMA being configured and 
started by the application using vfio APIs.   

The expectation is that, DMA is stopped/reset before we tear down the IOMMU 
mappings 
and finally free the mmapped pages(on which DMA is happening).  
  

>From the below stack trace(with dump_stack in vfio_pci_release), 
[  201.564273] [] vfio_pci_release+0x80/0x458
[  201.564276] [] vfio_device_fops_release+0x2c/0x50
[  201.564279] [] __fput+0x9c/0x218
[  201.564283] [] fput+0x20/0x30
[  201.564286] [] task_work_run+0xa0/0xc8
[  201.564289] [] do_exit+0x2bc/0x9c8
[  201.564293] [] do_group_exit+0x3c/0xa8
[  201.564296] [] get_signal+0x3e4/0x538
[  201.564299] [] do_signal+0x70/0x660
[  201.564302] [] do_notify_resume+0xe0/0x120

   

PCI device is disabled/reset from vfio_pci_release invoked as part of 
device fd release. The fd releases are in turn invoked from exit_files
and exit_task_work.

But exit_mm, gets called before exit_files/exit_task_work in do_exit.

   
Assuming all pages allocated/mmaped to a process gets freed in exit_mm, 

  
is there is a possibility that user pages configured for DMA can get freed 
to kernel before the vfio device is stopped/reset ? 

Thanks.

-- 
Linu cherian


Re: [PATCH 0/2] arm64 SMMUv3 PMU driver with IORT support

2017-12-18 Thread Linu Cherian
Hi Marc,

On Mon Dec 18, 2017 at 03:39:22PM +, Marc Zyngier wrote:
> Thanks for putting me in the loop Robin.
> 
> On 18/12/17 14:48, Robin Murphy wrote:
> > On 10/12/17 02:35, Linu Cherian wrote:
> >> Hi,
> >>
> >>
> >> On Fri Aug 04, 2017 at 03:59:12PM -0400, Neil Leeder wrote:
> >>> This adds a driver for the SMMUv3 PMU into the perf framework.
> >>> It includes an IORT update to support PM Counter Groups.
> >>>
> >>
> >> In one of Cavium's upcoming SOC, SMMU PMCG implementation is such a way
> >> that platform bus id (Device ID in ITS terminmology)is shared with that of 
> >> SMMU.
> >> This would be a matter of concern for software if the SMMU and SMMU PMCG 
> >> blocks
> >> are managed by two independent drivers.
> >>
> >> The problem arises when we want to alloc/free MSIs for these devices
> >> using the APIs, platform_msi_domain_alloc/free_irqs.
> >> Platform bus id being same for these two hardware blocks, they end up 
> >> sharing the same
> >> ITT(Interrupt Translation Table) in GIC ITS and hence alloc, free and 
> >> management
> >> of this shared ITT becomes a problem when they are managed by two 
> >> independent
> >> drivers.
> > 
> > What is the problem exactly? IIRC resizing a possibly-live ITT is a 
> > right pain in the bum to do - is it just that?
> 
> Understatement of the day! ;-) Yes, it is a massive headache, and will
> either result in a temporary loss of interrupts (at some point you have
> to unmap the ITT), or with spurious interrupts (you generate interrupts
> for all the MSIs you've blackholed when unmapping the ITT).

Just sharing a thought.

If the firmware, through device tree/ACPI 
can provide the following input to the ITS driver,
(For example, as part of msi-parent property in device tree)

1. flag indicating the ITT (Device ID) is being shared 
2. the maximum number of vectors that are required to be allocated for this ITT

resizing of ITT and the associated complexities could be avoided.   

 
> 
> > 
> >> We were looking into the option of keeping the SMMU PMCG nodes as sub 
> >> nodes under
> >> SMMUv3 node, so that SMMUv3 driver could probe and figure out the total 
> >> vectors
> >> required for SMMU PMCG devices and make a common 
> >> platform_msi_domain_alloc/free_irqs
> >> function call for all devices that share the platform bus id.
> > 
> > I'm not sure how scalable that approach would be, since it's not 
> > entirely obvious how to handle PMCGs associated with named components or 
> > root complexes (rather than directly with SMMU instances). We certainly 
> > don't want to end up spraying similar PMCG DevID logic around all manner 
> > of GPU/accelerator/etc. drivers in future (whilst PMCGs for device TLBs 
> > will be expected to have distinct IDs from their host devices, they 
> > could reasonably still overlap with other PMCGs/SMMUs).
> > 
> >> Would like to know your expert opinion on what would be the right approach
> >> to handle this case ?
> > 
> > My gut feeling says the way to deal with this properly is in the ITS 
> > code, but I appreciate that that's a lot easier said than done :/
> 
> I can revive the hack I once wrote for that (and that was hoping to
> forever forget), but that's pretty disgusting, and subject to the above
> caveat.
> 
> Thanks,
> 
>   M.
> -- 
> Jazz is not dead. It just smells funny...

-- 
Linu cherian


Re: [PATCH 0/2] arm64 SMMUv3 PMU driver with IORT support

2017-12-18 Thread Linu Cherian
Hi Marc,

On Mon Dec 18, 2017 at 03:39:22PM +, Marc Zyngier wrote:
> Thanks for putting me in the loop Robin.
> 
> On 18/12/17 14:48, Robin Murphy wrote:
> > On 10/12/17 02:35, Linu Cherian wrote:
> >> Hi,
> >>
> >>
> >> On Fri Aug 04, 2017 at 03:59:12PM -0400, Neil Leeder wrote:
> >>> This adds a driver for the SMMUv3 PMU into the perf framework.
> >>> It includes an IORT update to support PM Counter Groups.
> >>>
> >>
> >> In one of Cavium's upcoming SOC, SMMU PMCG implementation is such a way
> >> that platform bus id (Device ID in ITS terminmology)is shared with that of 
> >> SMMU.
> >> This would be a matter of concern for software if the SMMU and SMMU PMCG 
> >> blocks
> >> are managed by two independent drivers.
> >>
> >> The problem arises when we want to alloc/free MSIs for these devices
> >> using the APIs, platform_msi_domain_alloc/free_irqs.
> >> Platform bus id being same for these two hardware blocks, they end up 
> >> sharing the same
> >> ITT(Interrupt Translation Table) in GIC ITS and hence alloc, free and 
> >> management
> >> of this shared ITT becomes a problem when they are managed by two 
> >> independent
> >> drivers.
> > 
> > What is the problem exactly? IIRC resizing a possibly-live ITT is a 
> > right pain in the bum to do - is it just that?
> 
> Understatement of the day! ;-) Yes, it is a massive headache, and will
> either result in a temporary loss of interrupts (at some point you have
> to unmap the ITT), or with spurious interrupts (you generate interrupts
> for all the MSIs you've blackholed when unmapping the ITT).

Just sharing a thought.

If the firmware, through device tree/ACPI 
can provide the following input to the ITS driver,
(For example, as part of msi-parent property in device tree)

1. flag indicating the ITT (Device ID) is being shared 
2. the maximum number of vectors that are required to be allocated for this ITT

resizing of ITT and the associated complexities could be avoided.   

 
> 
> > 
> >> We were looking into the option of keeping the SMMU PMCG nodes as sub 
> >> nodes under
> >> SMMUv3 node, so that SMMUv3 driver could probe and figure out the total 
> >> vectors
> >> required for SMMU PMCG devices and make a common 
> >> platform_msi_domain_alloc/free_irqs
> >> function call for all devices that share the platform bus id.
> > 
> > I'm not sure how scalable that approach would be, since it's not 
> > entirely obvious how to handle PMCGs associated with named components or 
> > root complexes (rather than directly with SMMU instances). We certainly 
> > don't want to end up spraying similar PMCG DevID logic around all manner 
> > of GPU/accelerator/etc. drivers in future (whilst PMCGs for device TLBs 
> > will be expected to have distinct IDs from their host devices, they 
> > could reasonably still overlap with other PMCGs/SMMUs).
> > 
> >> Would like to know your expert opinion on what would be the right approach
> >> to handle this case ?
> > 
> > My gut feeling says the way to deal with this properly is in the ITS 
> > code, but I appreciate that that's a lot easier said than done :/
> 
> I can revive the hack I once wrote for that (and that was hoping to
> forever forget), but that's pretty disgusting, and subject to the above
> caveat.
> 
> Thanks,
> 
>   M.
> -- 
> Jazz is not dead. It just smells funny...

-- 
Linu cherian


Re: [PATCH 0/2] arm64 SMMUv3 PMU driver with IORT support

2017-12-18 Thread Linu Cherian
Hi Robin,

On Mon Dec 18, 2017 at 02:48:14PM +, Robin Murphy wrote:
> On 10/12/17 02:35, Linu Cherian wrote:
> >Hi,
> >
> >
> >On Fri Aug 04, 2017 at 03:59:12PM -0400, Neil Leeder wrote:
> >>This adds a driver for the SMMUv3 PMU into the perf framework.
> >>It includes an IORT update to support PM Counter Groups.
> >>
> >
> >In one of Cavium's upcoming SOC, SMMU PMCG implementation is such a way
> >that platform bus id (Device ID in ITS terminmology)is shared with that of 
> >SMMU.
> >This would be a matter of concern for software if the SMMU and SMMU PMCG 
> >blocks
> >are managed by two independent drivers.
> >
> >The problem arises when we want to alloc/free MSIs for these devices
> >using the APIs, platform_msi_domain_alloc/free_irqs.
> >Platform bus id being same for these two hardware blocks, they end up 
> >sharing the same
> >ITT(Interrupt Translation Table) in GIC ITS and hence alloc, free and 
> >management
> >of this shared ITT becomes a problem when they are managed by two independent
> >drivers.
> 
> What is the problem exactly? IIRC resizing a possibly-live ITT is a
> right pain in the bum to do - is it just that?

Yes exactly. Resizing ITT was the problem in sharing.

 
> >We were looking into the option of keeping the SMMU PMCG nodes as sub nodes 
> >under
> >SMMUv3 node, so that SMMUv3 driver could probe and figure out the total 
> >vectors
> >required for SMMU PMCG devices and make a common 
> >platform_msi_domain_alloc/free_irqs
> >function call for all devices that share the platform bus id.
> 
> I'm not sure how scalable that approach would be, since it's not
> entirely obvious how to handle PMCGs associated with named
> components or root complexes (rather than directly with SMMU
> instances). We certainly don't want to end up spraying similar PMCG
> DevID logic around all manner of GPU/accelerator/etc. drivers in
> future (whilst PMCGs for device TLBs will be expected to have
> distinct IDs from their host devices, they could reasonably still
> overlap with other PMCGs/SMMUs).
>

OK.
 
While trying the above approach, we also felt that the code will become 
lot messier than actually thought.

> >Would like to know your expert opinion on what would be the right approach
> >to handle this case ?
> 
> My gut feeling says the way to deal with this properly is in the ITS
> code, but I appreciate that that's a lot easier said than done :/
>

Yes Correct.
 
> Robin.

-- 
Linu cherian


Re: [PATCH 0/2] arm64 SMMUv3 PMU driver with IORT support

2017-12-18 Thread Linu Cherian
Hi Robin,

On Mon Dec 18, 2017 at 02:48:14PM +, Robin Murphy wrote:
> On 10/12/17 02:35, Linu Cherian wrote:
> >Hi,
> >
> >
> >On Fri Aug 04, 2017 at 03:59:12PM -0400, Neil Leeder wrote:
> >>This adds a driver for the SMMUv3 PMU into the perf framework.
> >>It includes an IORT update to support PM Counter Groups.
> >>
> >
> >In one of Cavium's upcoming SOC, SMMU PMCG implementation is such a way
> >that platform bus id (Device ID in ITS terminmology)is shared with that of 
> >SMMU.
> >This would be a matter of concern for software if the SMMU and SMMU PMCG 
> >blocks
> >are managed by two independent drivers.
> >
> >The problem arises when we want to alloc/free MSIs for these devices
> >using the APIs, platform_msi_domain_alloc/free_irqs.
> >Platform bus id being same for these two hardware blocks, they end up 
> >sharing the same
> >ITT(Interrupt Translation Table) in GIC ITS and hence alloc, free and 
> >management
> >of this shared ITT becomes a problem when they are managed by two independent
> >drivers.
> 
> What is the problem exactly? IIRC resizing a possibly-live ITT is a
> right pain in the bum to do - is it just that?

Yes exactly. Resizing ITT was the problem in sharing.

 
> >We were looking into the option of keeping the SMMU PMCG nodes as sub nodes 
> >under
> >SMMUv3 node, so that SMMUv3 driver could probe and figure out the total 
> >vectors
> >required for SMMU PMCG devices and make a common 
> >platform_msi_domain_alloc/free_irqs
> >function call for all devices that share the platform bus id.
> 
> I'm not sure how scalable that approach would be, since it's not
> entirely obvious how to handle PMCGs associated with named
> components or root complexes (rather than directly with SMMU
> instances). We certainly don't want to end up spraying similar PMCG
> DevID logic around all manner of GPU/accelerator/etc. drivers in
> future (whilst PMCGs for device TLBs will be expected to have
> distinct IDs from their host devices, they could reasonably still
> overlap with other PMCGs/SMMUs).
>

OK.
 
While trying the above approach, we also felt that the code will become 
lot messier than actually thought.

> >Would like to know your expert opinion on what would be the right approach
> >to handle this case ?
> 
> My gut feeling says the way to deal with this properly is in the ITS
> code, but I appreciate that that's a lot easier said than done :/
>

Yes Correct.
 
> Robin.

-- 
Linu cherian


Re: [PATCH 0/2] arm64 SMMUv3 PMU driver with IORT support

2017-12-09 Thread Linu Cherian
Hi,


On Fri Aug 04, 2017 at 03:59:12PM -0400, Neil Leeder wrote:
> This adds a driver for the SMMUv3 PMU into the perf framework.
> It includes an IORT update to support PM Counter Groups.
>

In one of Cavium's upcoming SOC, SMMU PMCG implementation is such a way
that platform bus id (Device ID in ITS terminmology)is shared with that of SMMU.
This would be a matter of concern for software if the SMMU and SMMU PMCG blocks
are managed by two independent drivers.

The problem arises when we want to alloc/free MSIs for these devices
using the APIs, platform_msi_domain_alloc/free_irqs. 
Platform bus id being same for these two hardware blocks, they end up sharing 
the same
ITT(Interrupt Translation Table) in GIC ITS and hence alloc, free and 
management 
of this shared ITT becomes a problem when they are managed by two independent
drivers.

We were looking into the option of keeping the SMMU PMCG nodes as sub nodes 
under
SMMUv3 node, so that SMMUv3 driver could probe and figure out the total vectors
required for SMMU PMCG devices and make a common 
platform_msi_domain_alloc/free_irqs
function call for all devices that share the platform bus id.

Would like to know your expert opinion on what would be the right approach 
to handle this case ?

Thanks.


 
> IORT has no mechanism for determining device names so PMUs
> are named based on their physical address. 
> 
> Tested on Qualcomm QDF2400. perf_fuzzer ran for 4+ hours
> with no failures.
> 
> Neil Leeder (2):
>   acpi: arm64: add iort support for PMCG
>   perf: add arm64 smmuv3 pmu driver
> 
>  drivers/acpi/arm64/iort.c |  54 +++
>  drivers/perf/Kconfig  |   9 +
>  drivers/perf/Makefile |   1 +
>  drivers/perf/arm_smmuv3_pmu.c | 823 
> ++
>  include/acpi/actbl2.h |   9 +-
>  5 files changed, 895 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/perf/arm_smmuv3_pmu.c
> 
> -- 
> Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
> Technologies Inc.
> Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project.
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

-- 
Linu cherian


Re: [PATCH 0/2] arm64 SMMUv3 PMU driver with IORT support

2017-12-09 Thread Linu Cherian
Hi,


On Fri Aug 04, 2017 at 03:59:12PM -0400, Neil Leeder wrote:
> This adds a driver for the SMMUv3 PMU into the perf framework.
> It includes an IORT update to support PM Counter Groups.
>

In one of Cavium's upcoming SOC, SMMU PMCG implementation is such a way
that platform bus id (Device ID in ITS terminmology)is shared with that of SMMU.
This would be a matter of concern for software if the SMMU and SMMU PMCG blocks
are managed by two independent drivers.

The problem arises when we want to alloc/free MSIs for these devices
using the APIs, platform_msi_domain_alloc/free_irqs. 
Platform bus id being same for these two hardware blocks, they end up sharing 
the same
ITT(Interrupt Translation Table) in GIC ITS and hence alloc, free and 
management 
of this shared ITT becomes a problem when they are managed by two independent
drivers.

We were looking into the option of keeping the SMMU PMCG nodes as sub nodes 
under
SMMUv3 node, so that SMMUv3 driver could probe and figure out the total vectors
required for SMMU PMCG devices and make a common 
platform_msi_domain_alloc/free_irqs
function call for all devices that share the platform bus id.

Would like to know your expert opinion on what would be the right approach 
to handle this case ?

Thanks.


 
> IORT has no mechanism for determining device names so PMUs
> are named based on their physical address. 
> 
> Tested on Qualcomm QDF2400. perf_fuzzer ran for 4+ hours
> with no failures.
> 
> Neil Leeder (2):
>   acpi: arm64: add iort support for PMCG
>   perf: add arm64 smmuv3 pmu driver
> 
>  drivers/acpi/arm64/iort.c |  54 +++
>  drivers/perf/Kconfig  |   9 +
>  drivers/perf/Makefile |   1 +
>  drivers/perf/arm_smmuv3_pmu.c | 823 
> ++
>  include/acpi/actbl2.h |   9 +-
>  5 files changed, 895 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/perf/arm_smmuv3_pmu.c
> 
> -- 
> Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
> Technologies Inc.
> Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project.
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

-- 
Linu cherian


Re: [PATCH 2/2] perf: add arm64 smmuv3 pmu driver

2017-12-04 Thread Linu Cherian
 +   .stop   = smmu_pmu_event_stop,
> > +   .read   = smmu_pmu_event_read,
> > +   .attr_groups= smmu_pmu_attr_grps,
> > +   };
> > +
> > +   mem_resource_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +   mem_map_0 = devm_ioremap_resource(>dev, mem_resource_0);
> > +
> > +   if (IS_ERR(mem_map_0)) {
> > +   dev_err(>dev, "Can't map SMMU PMU @%pa\n",
> > +   _resource_0->start);
> > +   return PTR_ERR(mem_map_0);
> > +   }
> > +
> > +   smmu_pmu->reg_base = mem_map_0;
> > +   smmu_pmu->pmu.name =
> > +   devm_kasprintf(>dev, GFP_KERNEL, "smmu_0_%llx",
> > +  (mem_resource_0->start) >> SMMU_PA_SHIFT);
> > +
> > +   if (!smmu_pmu->pmu.name) {
> > +   dev_err(>dev, "Failed to create PMU name");
> > +   return -EINVAL;
> > +   }
> > +
> > +   ceid_64 = readq(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
> > +   ceid[0] = ceid_64 & GENMASK(31, 0);
> 
> It took a second look to determine that that masking does nothing...
> 
> > +   ceid[1] = ceid_64 >> 32;
> > +   ceid_64 = readq(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
> > +   ceid[2] = ceid_64 & GENMASK(31, 0);
> > +   ceid[3] = ceid_64 >> 32;
> > +   bitmap_from_u32array(smmu_pmu->supported_events, SMMU_MAX_EVENT_ID,
> > +ceid, SMMU_NUM_EVENTS_U32);
> 
> ...but then the whole lot might be cleaner and simpler with a u64[2]
> cast to u32* (or unioned to u32[4]) as necessary.
> 
> > +
> > +   /* Determine if page 1 is present */
> > +   if (readl(smmu_pmu->reg_base + SMMU_PMCG_CFGR) &
> > +   SMMU_PMCG_CFGR_RELOC_CTRS) {
> > +   mem_resource_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +   mem_map_1 = devm_ioremap_resource(>dev, mem_resource_1);
> > +
> > +   if (IS_ERR(mem_map_1)) {
> > +   dev_err(>dev, "Can't map SMMU PMU @%pa\n",
> > +   _resource_1->start);
> > +   return PTR_ERR(mem_map_1);
> > +   }
> > +   smmu_pmu->reloc_base = mem_map_1;
> > +   } else {
> > +   smmu_pmu->reloc_base = smmu_pmu->reg_base;
> > +   }
> > +
> > +   irq = platform_get_irq(pdev, 0);
> > +   if (irq < 0) {
> > +   dev_err(>dev,
> > +   "Failed to get valid irq for smmu @%pa\n",
> > +   _resource_0->start);
> > +   return irq;
> > +   }
> > +
> > +   err = devm_request_irq(>dev, irq, smmu_pmu_handle_irq,
> > +  IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD,
> > +  "smmu-pmu", smmu_pmu);
> > +   if (err) {
> > +   dev_err(>dev,
> > +   "Unable to request IRQ%d for SMMU PMU counters\n", irq);
> > +   return err;
> > +   }
> > +
> > +   smmu_pmu->irq = irq;
> > +
> > +   /* Pick one CPU to be the preferred one to use */
> > +   smmu_pmu->on_cpu = smp_processor_id();
> > +   WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
> > +
> > +   smmu_pmu->num_counters = get_num_counters(smmu_pmu);
> > +   smmu_pmu->pdev = pdev;
> > +   smmu_pmu->counter_present_mask = GENMASK(smmu_pmu->num_counters - 1, 0);
> > +   reg_size = (readl(smmu_pmu->reg_base + SMMU_PMCG_CFGR) &
> > +   SMMU_PMCG_CFGR_SIZE_MASK) >> SMMU_PMCG_CFGR_SIZE_SHIFT;
> > +   smmu_pmu->reg_size_32 = (reg_size == SMMU_PMCG_CFGR_COUNTER_SIZE_32);
> > +   smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0);
> > +
> > +   smmu_pmu_reset(smmu_pmu);
> > +
> > +   err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
> > +  _pmu->node);
> > +   if (err) {
> > +   dev_err(>dev, "Error %d registering hotplug", err);
> > +   return err;
> > +   }
> > +
> > +   err = perf_pmu_register(_pmu->pmu, smmu_pmu->pmu.name, -1);
> > +   if (err) {
> > +   dev_err(>dev, "Error %d registering SMMU PMU\n", err);
> > +   goto out_unregister;
> > +   }
> > +
> > +   dev_info(>dev, "Registered SMMU PMU @ %pa using %d counters\n",
> > +_resource_0->start, smmu_pmu->num_counters);
> > +
> > +   return err;
> > +
> > +out_unregister:
> > +   cpuhp_state_remove_instance_nocalls(cpuhp_state_num, _pmu->node);
> > +   return err;
> > +}
> > +
> > +static int smmu_pmu_remove(struct platform_device *pdev)
> > +{
> > +   struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
> > +
> > +   perf_pmu_unregister(_pmu->pmu);
> > +   cpuhp_state_remove_instance_nocalls(cpuhp_state_num, _pmu->node);
> > +
> > +   return 0;
> > +}
> > +
> > +static void smmu_pmu_shutdown(struct platform_device *pdev)
> > +{
> > +   struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
> > +
> > +   smmu_pmu_disable(_pmu->pmu);
> > +}
> > +
> > +static struct platform_driver smmu_pmu_driver = {
> > +   .driver = {
> > +   .name = "arm-smmu-pmu",
> 
> Nit: "arm-smmu-v3-pmu" please, for consistency with the IOMMU driver
> naming. There is a SMMUv2 PMU driver in the works, too ;)
> 
> Robin.
> 
> > +   },
> > +   .probe = smmu_pmu_probe,
> > +   .remove = smmu_pmu_remove,
> > +   .shutdown = smmu_pmu_shutdown,
> > +};
> > +
> > +static int __init arm_smmu_pmu_init(void)
> > +{
> > +   cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
> > + "perf/arm/smmupmu:online",
> > + NULL,
> > + smmu_pmu_offline_cpu);
> > +   if (cpuhp_state_num < 0)
> > +   return cpuhp_state_num;
> > +
> > +   return platform_driver_register(_pmu_driver);
> > +}
> > +module_init(arm_smmu_pmu_init);
> > +
> > +static void __exit arm_smmu_pmu_exit(void)
> > +{
> > +   platform_driver_unregister(_pmu_driver);
> > +}
> > +
> > +module_exit(arm_smmu_pmu_exit);
> > +MODULE_LICENSE("GPL v2");
> > 
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

-- 
Linu cherian


Re: [PATCH 2/2] perf: add arm64 smmuv3 pmu driver

2017-12-04 Thread Linu Cherian
= smmu_pmu_event_stop,
> > +   .read   = smmu_pmu_event_read,
> > +   .attr_groups= smmu_pmu_attr_grps,
> > +   };
> > +
> > +   mem_resource_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +   mem_map_0 = devm_ioremap_resource(>dev, mem_resource_0);
> > +
> > +   if (IS_ERR(mem_map_0)) {
> > +   dev_err(>dev, "Can't map SMMU PMU @%pa\n",
> > +   _resource_0->start);
> > +   return PTR_ERR(mem_map_0);
> > +   }
> > +
> > +   smmu_pmu->reg_base = mem_map_0;
> > +   smmu_pmu->pmu.name =
> > +   devm_kasprintf(>dev, GFP_KERNEL, "smmu_0_%llx",
> > +  (mem_resource_0->start) >> SMMU_PA_SHIFT);
> > +
> > +   if (!smmu_pmu->pmu.name) {
> > +   dev_err(>dev, "Failed to create PMU name");
> > +   return -EINVAL;
> > +   }
> > +
> > +   ceid_64 = readq(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
> > +   ceid[0] = ceid_64 & GENMASK(31, 0);
> 
> It took a second look to determine that that masking does nothing...
> 
> > +   ceid[1] = ceid_64 >> 32;
> > +   ceid_64 = readq(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
> > +   ceid[2] = ceid_64 & GENMASK(31, 0);
> > +   ceid[3] = ceid_64 >> 32;
> > +   bitmap_from_u32array(smmu_pmu->supported_events, SMMU_MAX_EVENT_ID,
> > +ceid, SMMU_NUM_EVENTS_U32);
> 
> ...but then the whole lot might be cleaner and simpler with a u64[2]
> cast to u32* (or unioned to u32[4]) as necessary.
> 
> > +
> > +   /* Determine if page 1 is present */
> > +   if (readl(smmu_pmu->reg_base + SMMU_PMCG_CFGR) &
> > +   SMMU_PMCG_CFGR_RELOC_CTRS) {
> > +   mem_resource_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +   mem_map_1 = devm_ioremap_resource(>dev, mem_resource_1);
> > +
> > +   if (IS_ERR(mem_map_1)) {
> > +   dev_err(>dev, "Can't map SMMU PMU @%pa\n",
> > +   _resource_1->start);
> > +   return PTR_ERR(mem_map_1);
> > +   }
> > +   smmu_pmu->reloc_base = mem_map_1;
> > +   } else {
> > +   smmu_pmu->reloc_base = smmu_pmu->reg_base;
> > +   }
> > +
> > +   irq = platform_get_irq(pdev, 0);
> > +   if (irq < 0) {
> > +   dev_err(>dev,
> > +   "Failed to get valid irq for smmu @%pa\n",
> > +   _resource_0->start);
> > +   return irq;
> > +   }
> > +
> > +   err = devm_request_irq(>dev, irq, smmu_pmu_handle_irq,
> > +  IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD,
> > +  "smmu-pmu", smmu_pmu);
> > +   if (err) {
> > +   dev_err(>dev,
> > +   "Unable to request IRQ%d for SMMU PMU counters\n", irq);
> > +   return err;
> > +   }
> > +
> > +   smmu_pmu->irq = irq;
> > +
> > +   /* Pick one CPU to be the preferred one to use */
> > +   smmu_pmu->on_cpu = smp_processor_id();
> > +   WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
> > +
> > +   smmu_pmu->num_counters = get_num_counters(smmu_pmu);
> > +   smmu_pmu->pdev = pdev;
> > +   smmu_pmu->counter_present_mask = GENMASK(smmu_pmu->num_counters - 1, 0);
> > +   reg_size = (readl(smmu_pmu->reg_base + SMMU_PMCG_CFGR) &
> > +   SMMU_PMCG_CFGR_SIZE_MASK) >> SMMU_PMCG_CFGR_SIZE_SHIFT;
> > +   smmu_pmu->reg_size_32 = (reg_size == SMMU_PMCG_CFGR_COUNTER_SIZE_32);
> > +   smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0);
> > +
> > +   smmu_pmu_reset(smmu_pmu);
> > +
> > +   err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
> > +  _pmu->node);
> > +   if (err) {
> > +   dev_err(>dev, "Error %d registering hotplug", err);
> > +   return err;
> > +   }
> > +
> > +   err = perf_pmu_register(_pmu->pmu, smmu_pmu->pmu.name, -1);
> > +   if (err) {
> > +   dev_err(>dev, "Error %d registering SMMU PMU\n", err);
> > +   goto out_unregister;
> > +   }
> > +
> > +   dev_info(>dev, "Registered SMMU PMU @ %pa using %d counters\n",
> > +_resource_0->start, smmu_pmu->num_counters);
> > +
> > +   return err;
> > +
> > +out_unregister:
> > +   cpuhp_state_remove_instance_nocalls(cpuhp_state_num, _pmu->node);
> > +   return err;
> > +}
> > +
> > +static int smmu_pmu_remove(struct platform_device *pdev)
> > +{
> > +   struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
> > +
> > +   perf_pmu_unregister(_pmu->pmu);
> > +   cpuhp_state_remove_instance_nocalls(cpuhp_state_num, _pmu->node);
> > +
> > +   return 0;
> > +}
> > +
> > +static void smmu_pmu_shutdown(struct platform_device *pdev)
> > +{
> > +   struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
> > +
> > +   smmu_pmu_disable(_pmu->pmu);
> > +}
> > +
> > +static struct platform_driver smmu_pmu_driver = {
> > +   .driver = {
> > +   .name = "arm-smmu-pmu",
> 
> Nit: "arm-smmu-v3-pmu" please, for consistency with the IOMMU driver
> naming. There is a SMMUv2 PMU driver in the works, too ;)
> 
> Robin.
> 
> > +   },
> > +   .probe = smmu_pmu_probe,
> > +   .remove = smmu_pmu_remove,
> > +   .shutdown = smmu_pmu_shutdown,
> > +};
> > +
> > +static int __init arm_smmu_pmu_init(void)
> > +{
> > +   cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
> > + "perf/arm/smmupmu:online",
> > + NULL,
> > + smmu_pmu_offline_cpu);
> > +   if (cpuhp_state_num < 0)
> > +   return cpuhp_state_num;
> > +
> > +   return platform_driver_register(_pmu_driver);
> > +}
> > +module_init(arm_smmu_pmu_init);
> > +
> > +static void __exit arm_smmu_pmu_exit(void)
> > +{
> > +   platform_driver_unregister(_pmu_driver);
> > +}
> > +
> > +module_exit(arm_smmu_pmu_exit);
> > +MODULE_LICENSE("GPL v2");
> > 
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

-- 
Linu cherian


Re: [v4 3/4] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74

2017-05-09 Thread Linu Cherian
> On Tue May 09, 2017 at 02:02:58PM +0100, Robin Murphy wrote:
> > On 09/05/17 12:45, Geetha sowjanya wrote:
> > > From: Linu Cherian <linu.cher...@cavium.com>
> > > 
> > > Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
> > > and PAGE0_REGS_ONLY option is enabled as an errata workaround.
> > > This option when turned on, replaces all page 1 offsets used for
> > > EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.
> > > 
> > > SMMU resource size checks are now based on SMMU option PAGE0_REGS_ONLY,
> > > since resource size can be either 64k/128k.
> > > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > > platform_get_resource call, so that SMMU options are set beforehand.
> > > 
> > > Signed-off-by: Linu Cherian <linu.cher...@cavium.com>
> > > Signed-off-by: Geetha Sowjanya <geethasowjanya.ak...@cavium.com>
> > > ---
> > >  Documentation/arm64/silicon-errata.txt |  1 +
> > >  .../devicetree/bindings/iommu/arm,smmu-v3.txt  |  6 ++
> > >  drivers/iommu/arm-smmu-v3.c| 80 
> > > --
> > >  3 files changed, 66 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/Documentation/arm64/silicon-errata.txt 
> > > b/Documentation/arm64/silicon-errata.txt
> > > index 10f2ddd..4693a32 100644
> > > --- a/Documentation/arm64/silicon-errata.txt
> > > +++ b/Documentation/arm64/silicon-errata.txt
> > > @@ -62,6 +62,7 @@ stable kernels.
> > >  | Cavium | ThunderX GICv3  | #23154  | 
> > > CAVIUM_ERRATUM_23154|
> > >  | Cavium | ThunderX Core   | #27456  | 
> > > CAVIUM_ERRATUM_27456|
> > >  | Cavium | ThunderX SMMUv2 | #27704  | N/A   
> > >   |
> > > +| Cavium | ThunderX2 SMMUv3| #74 | N/A   
> > >   |
> > >  || | |   
> > >   |
> > >  | Freescale/NXP  | LS2080A/LS1043A | A-008585| 
> > > FSL_ERRATUM_A008585 |
> > >  || | |   
> > >   |
> > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt 
> > > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > > index be57550..e6da62b 100644
> > > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > > @@ -49,6 +49,12 @@ the PCIe specification.
> > >  - hisilicon,broken-prefetch-cmd
> > >  : Avoid sending CMD_PREFETCH_* commands to the SMMU.
> > >  
> > > +- cavium-cn99xx,broken-page1-regspace
> > > +: Replaces all page 1 offsets used for 
> > > EVTQ_PROD/CONS,
> > > + PRIQ_PROD/CONS register access 
> > > with page 0 offsets.
> > > + Set for Caviun ThunderX2 
> > > silicon that doesn't support
> > > + SMMU page1 register space.
> > > +
> > >  ** Example
> > >  
> > >  smmu@2b40 {
> > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > > index 380969a..1e986a0 100644
> > > --- a/drivers/iommu/arm-smmu-v3.c
> > > +++ b/drivers/iommu/arm-smmu-v3.c
> > > @@ -176,15 +176,15 @@
> > >  #define ARM_SMMU_CMDQ_CONS   0x9c
> > >  
> > >  #define ARM_SMMU_EVTQ_BASE   0xa0
> > > -#define ARM_SMMU_EVTQ_PROD   0x100a8
> > > -#define ARM_SMMU_EVTQ_CONS   0x100ac
> > > +#define ARM_SMMU_EVTQ_PROD(smmu) (page1_offset_adjust(0x100a8, smmu))
> > > +#define ARM_SMMU_EVTQ_CONS(smmu) (page1_offset_adjust(0x100ac, smmu))
> > 
> > Sorry, perhaps I should have communicated the rest of the idea more
> > explicitly - you now don't need to change these definitions...
> > 
> 
> Fine. 
> 
> 
> > >  #define ARM_SMMU_EVTQ_IRQ_CFG0   0xb0
> > >  #define ARM_SMMU_EVTQ_IRQ_CFG1   0xb8
> > >  #define ARM_SMMU_EVTQ_IRQ_CFG2   0xbc
> > >  
> > >  #define ARM_SMMU_PRIQ_BASE   0xc0
> > > -#define ARM_SMMU_PRIQ_PROD   0x100c8
> > > -#define ARM_S

Re: [v4 3/4] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74

2017-05-09 Thread Linu Cherian
> On Tue May 09, 2017 at 02:02:58PM +0100, Robin Murphy wrote:
> > On 09/05/17 12:45, Geetha sowjanya wrote:
> > > From: Linu Cherian 
> > > 
> > > Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
> > > and PAGE0_REGS_ONLY option is enabled as an errata workaround.
> > > This option when turned on, replaces all page 1 offsets used for
> > > EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.
> > > 
> > > SMMU resource size checks are now based on SMMU option PAGE0_REGS_ONLY,
> > > since resource size can be either 64k/128k.
> > > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > > platform_get_resource call, so that SMMU options are set beforehand.
> > > 
> > > Signed-off-by: Linu Cherian 
> > > Signed-off-by: Geetha Sowjanya 
> > > ---
> > >  Documentation/arm64/silicon-errata.txt |  1 +
> > >  .../devicetree/bindings/iommu/arm,smmu-v3.txt  |  6 ++
> > >  drivers/iommu/arm-smmu-v3.c| 80 
> > > --
> > >  3 files changed, 66 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/Documentation/arm64/silicon-errata.txt 
> > > b/Documentation/arm64/silicon-errata.txt
> > > index 10f2ddd..4693a32 100644
> > > --- a/Documentation/arm64/silicon-errata.txt
> > > +++ b/Documentation/arm64/silicon-errata.txt
> > > @@ -62,6 +62,7 @@ stable kernels.
> > >  | Cavium | ThunderX GICv3  | #23154  | 
> > > CAVIUM_ERRATUM_23154|
> > >  | Cavium | ThunderX Core   | #27456  | 
> > > CAVIUM_ERRATUM_27456|
> > >  | Cavium | ThunderX SMMUv2 | #27704  | N/A   
> > >   |
> > > +| Cavium | ThunderX2 SMMUv3| #74 | N/A   
> > >   |
> > >  || | |   
> > >   |
> > >  | Freescale/NXP  | LS2080A/LS1043A | A-008585| 
> > > FSL_ERRATUM_A008585 |
> > >  || | |   
> > >   |
> > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt 
> > > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > > index be57550..e6da62b 100644
> > > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > > @@ -49,6 +49,12 @@ the PCIe specification.
> > >  - hisilicon,broken-prefetch-cmd
> > >  : Avoid sending CMD_PREFETCH_* commands to the SMMU.
> > >  
> > > +- cavium-cn99xx,broken-page1-regspace
> > > +: Replaces all page 1 offsets used for 
> > > EVTQ_PROD/CONS,
> > > + PRIQ_PROD/CONS register access 
> > > with page 0 offsets.
> > > + Set for Caviun ThunderX2 
> > > silicon that doesn't support
> > > + SMMU page1 register space.
> > > +
> > >  ** Example
> > >  
> > >  smmu@2b40 {
> > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > > index 380969a..1e986a0 100644
> > > --- a/drivers/iommu/arm-smmu-v3.c
> > > +++ b/drivers/iommu/arm-smmu-v3.c
> > > @@ -176,15 +176,15 @@
> > >  #define ARM_SMMU_CMDQ_CONS   0x9c
> > >  
> > >  #define ARM_SMMU_EVTQ_BASE   0xa0
> > > -#define ARM_SMMU_EVTQ_PROD   0x100a8
> > > -#define ARM_SMMU_EVTQ_CONS   0x100ac
> > > +#define ARM_SMMU_EVTQ_PROD(smmu) (page1_offset_adjust(0x100a8, smmu))
> > > +#define ARM_SMMU_EVTQ_CONS(smmu) (page1_offset_adjust(0x100ac, smmu))
> > 
> > Sorry, perhaps I should have communicated the rest of the idea more
> > explicitly - you now don't need to change these definitions...
> > 
> 
> Fine. 
> 
> 
> > >  #define ARM_SMMU_EVTQ_IRQ_CFG0   0xb0
> > >  #define ARM_SMMU_EVTQ_IRQ_CFG1   0xb8
> > >  #define ARM_SMMU_EVTQ_IRQ_CFG2   0xbc
> > >  
> > >  #define ARM_SMMU_PRIQ_BASE   0xc0
> > > -#define ARM_SMMU_PRIQ_PROD   0x100c8
> > > -#define ARM_SMMU_PRIQ_CONS   0x100cc
> > > +#define ARM_SMMU_PRIQ_PROD(smmu) (page1_off

Re: [v4 3/4] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74

2017-05-09 Thread Linu Cherian
On Tue May 09, 2017 at 02:02:58PM +0100, Robin Murphy wrote:
> On 09/05/17 12:45, Geetha sowjanya wrote:
> > From: Linu Cherian <linu.cher...@cavium.com>
> > 
> > Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
> > and PAGE0_REGS_ONLY option is enabled as an errata workaround.
> > This option when turned on, replaces all page 1 offsets used for
> > EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.
> > 
> > SMMU resource size checks are now based on SMMU option PAGE0_REGS_ONLY,
> > since resource size can be either 64k/128k.
> > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > platform_get_resource call, so that SMMU options are set beforehand.
> > 
> > Signed-off-by: Linu Cherian <linu.cher...@cavium.com>
> > Signed-off-by: Geetha Sowjanya <geethasowjanya.ak...@cavium.com>
> > ---
> >  Documentation/arm64/silicon-errata.txt |  1 +
> >  .../devicetree/bindings/iommu/arm,smmu-v3.txt  |  6 ++
> >  drivers/iommu/arm-smmu-v3.c| 80 
> > --
> >  3 files changed, 66 insertions(+), 21 deletions(-)
> > 
> > diff --git a/Documentation/arm64/silicon-errata.txt 
> > b/Documentation/arm64/silicon-errata.txt
> > index 10f2ddd..4693a32 100644
> > --- a/Documentation/arm64/silicon-errata.txt
> > +++ b/Documentation/arm64/silicon-errata.txt
> > @@ -62,6 +62,7 @@ stable kernels.
> >  | Cavium | ThunderX GICv3  | #23154  | 
> > CAVIUM_ERRATUM_23154|
> >  | Cavium | ThunderX Core   | #27456  | 
> > CAVIUM_ERRATUM_27456|
> >  | Cavium | ThunderX SMMUv2 | #27704  | N/A 
> > |
> > +| Cavium | ThunderX2 SMMUv3| #74 | N/A 
> > |
> >  || | | 
> > |
> >  | Freescale/NXP  | LS2080A/LS1043A | A-008585| FSL_ERRATUM_A008585 
> > |
> >  || | | 
> > |
> > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt 
> > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > index be57550..e6da62b 100644
> > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > @@ -49,6 +49,12 @@ the PCIe specification.
> >  - hisilicon,broken-prefetch-cmd
> >  : Avoid sending CMD_PREFETCH_* commands to the SMMU.
> >  
> > +- cavium-cn99xx,broken-page1-regspace
> > +: Replaces all page 1 offsets used for EVTQ_PROD/CONS,
> > +   PRIQ_PROD/CONS register access 
> > with page 0 offsets.
> > +   Set for Caviun ThunderX2 
> > silicon that doesn't support
> > +   SMMU page1 register space.
> > +
> >  ** Example
> >  
> >  smmu@2b40 {
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index 380969a..1e986a0 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -176,15 +176,15 @@
> >  #define ARM_SMMU_CMDQ_CONS 0x9c
> >  
> >  #define ARM_SMMU_EVTQ_BASE 0xa0
> > -#define ARM_SMMU_EVTQ_PROD 0x100a8
> > -#define ARM_SMMU_EVTQ_CONS 0x100ac
> > +#define ARM_SMMU_EVTQ_PROD(smmu)   (page1_offset_adjust(0x100a8, smmu))
> > +#define ARM_SMMU_EVTQ_CONS(smmu)   (page1_offset_adjust(0x100ac, smmu))
> 
> Sorry, perhaps I should have communicated the rest of the idea more
> explicitly - you now don't need to change these definitions...
> 

Fine. 


> >  #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
> >  #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
> >  #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
> >  
> >  #define ARM_SMMU_PRIQ_BASE 0xc0
> > -#define ARM_SMMU_PRIQ_PROD 0x100c8
> > -#define ARM_SMMU_PRIQ_CONS 0x100cc
> > +#define ARM_SMMU_PRIQ_PROD(smmu)   (page1_offset_adjust(0x100c8, smmu))
> > +#define ARM_SMMU_PRIQ_CONS(smmu)   (page1_offset_adjust(0x100cc, smmu))
> >  #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
> >  #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
> >  #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
> > @@ -412,6 +412,9 @@
> >  #define MSI_IOVA_BASE  0

Re: [v4 3/4] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74

2017-05-09 Thread Linu Cherian
On Tue May 09, 2017 at 02:02:58PM +0100, Robin Murphy wrote:
> On 09/05/17 12:45, Geetha sowjanya wrote:
> > From: Linu Cherian 
> > 
> > Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
> > and PAGE0_REGS_ONLY option is enabled as an errata workaround.
> > This option when turned on, replaces all page 1 offsets used for
> > EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.
> > 
> > SMMU resource size checks are now based on SMMU option PAGE0_REGS_ONLY,
> > since resource size can be either 64k/128k.
> > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > platform_get_resource call, so that SMMU options are set beforehand.
> > 
> > Signed-off-by: Linu Cherian 
> > Signed-off-by: Geetha Sowjanya 
> > ---
> >  Documentation/arm64/silicon-errata.txt |  1 +
> >  .../devicetree/bindings/iommu/arm,smmu-v3.txt  |  6 ++
> >  drivers/iommu/arm-smmu-v3.c| 80 
> > --
> >  3 files changed, 66 insertions(+), 21 deletions(-)
> > 
> > diff --git a/Documentation/arm64/silicon-errata.txt 
> > b/Documentation/arm64/silicon-errata.txt
> > index 10f2ddd..4693a32 100644
> > --- a/Documentation/arm64/silicon-errata.txt
> > +++ b/Documentation/arm64/silicon-errata.txt
> > @@ -62,6 +62,7 @@ stable kernels.
> >  | Cavium | ThunderX GICv3  | #23154  | 
> > CAVIUM_ERRATUM_23154|
> >  | Cavium | ThunderX Core   | #27456  | 
> > CAVIUM_ERRATUM_27456|
> >  | Cavium | ThunderX SMMUv2 | #27704  | N/A 
> > |
> > +| Cavium | ThunderX2 SMMUv3| #74 | N/A 
> > |
> >  || | | 
> > |
> >  | Freescale/NXP  | LS2080A/LS1043A | A-008585| FSL_ERRATUM_A008585 
> > |
> >  || | | 
> > |
> > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt 
> > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > index be57550..e6da62b 100644
> > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > @@ -49,6 +49,12 @@ the PCIe specification.
> >  - hisilicon,broken-prefetch-cmd
> >  : Avoid sending CMD_PREFETCH_* commands to the SMMU.
> >  
> > +- cavium-cn99xx,broken-page1-regspace
> > +: Replaces all page 1 offsets used for EVTQ_PROD/CONS,
> > +   PRIQ_PROD/CONS register access 
> > with page 0 offsets.
> > +   Set for Caviun ThunderX2 
> > silicon that doesn't support
> > +   SMMU page1 register space.
> > +
> >  ** Example
> >  
> >  smmu@2b40 {
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index 380969a..1e986a0 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -176,15 +176,15 @@
> >  #define ARM_SMMU_CMDQ_CONS 0x9c
> >  
> >  #define ARM_SMMU_EVTQ_BASE 0xa0
> > -#define ARM_SMMU_EVTQ_PROD 0x100a8
> > -#define ARM_SMMU_EVTQ_CONS 0x100ac
> > +#define ARM_SMMU_EVTQ_PROD(smmu)   (page1_offset_adjust(0x100a8, smmu))
> > +#define ARM_SMMU_EVTQ_CONS(smmu)   (page1_offset_adjust(0x100ac, smmu))
> 
> Sorry, perhaps I should have communicated the rest of the idea more
> explicitly - you now don't need to change these definitions...
> 

Fine. 


> >  #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
> >  #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
> >  #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
> >  
> >  #define ARM_SMMU_PRIQ_BASE 0xc0
> > -#define ARM_SMMU_PRIQ_PROD 0x100c8
> > -#define ARM_SMMU_PRIQ_CONS 0x100cc
> > +#define ARM_SMMU_PRIQ_PROD(smmu)   (page1_offset_adjust(0x100c8, smmu))
> > +#define ARM_SMMU_PRIQ_CONS(smmu)   (page1_offset_adjust(0x100cc, smmu))
> >  #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
> >  #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
> >  #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
> > @@ -412,6 +412,9 @@
> >  #define MSI_IOVA_BASE  0x800
> >  #define MSI_IOVA_LENGTH0x10
> >  
> > +#define ARM_SMMU_PAGE0_REGS_ONLY(s

Re: [PATCH v3 0/7] Cavium ThunderX2 SMMUv3 errata workarounds

2017-05-08 Thread Linu Cherian
On Sat May 06, 2017 at 12:22:50AM +0200, Robert Richter wrote:
> On 05.05.17 17:38:04, Geetha sowjanya wrote:
> > From: Linu Cherian <linu.cher...@cavium.com>
> > 
> > Cavium ThunderX2 SMMUv3 implementation has two Silicon Erratas.
> > 1. Errata ID #74
> >SMMU register alias Page 1 is not implemented
> > 2. Errata ID #126
> >SMMU doesnt support unique IRQ lines and also MSI for gerror, 
> >eventq and cmdq-sync
> > 
> > The following patchset does software workaround for these two erratas.
> > 
> > This series is based on patchset. 
> > https://www.spinics.net/lists/arm-kernel/msg578443.html
> > 
> > Changes from v1:
> >  Since the use of MIDR register is rejected and SMMU_IIDR is broken on this 
> >  silicon, as suggested by Will Deacon modified the patches to use ThunderX2 
> >  SMMUv3 IORT model number to enable errata workaround.
> > 
> > Changes from v2:
> >  Updated "Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt" document 
> > with 
> >  new SMMU option used to enable errata workaround.
> >  
> > Geetha Sowjanya (1):
> >   iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126
> > 
> > Linu Cherian (6):
> >   iommu/arm-smmu-v3: Introduce smmu option PAGE0_REGS_ONLY for ThunderX2
> > errata#74.
> >   iommu/arm-smmu-v3: Do resource size checks based on SMMU option
> > PAGE0_REGS_ONLY
> >   ACPICA: IORT: Add Cavium ThunderX2 SMMUv3 model definition.
> >   iommu/arm-smmu-v3: For ACPI based device probing, set PAGE0_REGS_ONLY
> > option for ThunderX2 SMMUv3 implementations.
> >   ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3
> > model
> >   arm64: Documentation: Add Cavium ThunderX2 SMMUv3 erratas
> 
> This split into patches does not look reasonable to me. 1 patch only
> for each workaround should be sufficient.
> 

* Should we not atleast keep the changes in drivers/acpi/iort.c and
 include/acpi/actbl2.h seperate, since they are outside smmuv3 driver ? 

* Probably i can merge the below patches, 
  1. iommu/arm-smmu-v3: Introduce smmu option PAGE0_REGS_ONLY for ThunderX2 
errata#74.
  2. iommu/arm-smmu-v3: Do resource size checks based on SMMU option
 PAGE0_REGS_O

Is that fine ?

Thanks.
-- 
Linu cherian


Re: [PATCH v3 0/7] Cavium ThunderX2 SMMUv3 errata workarounds

2017-05-08 Thread Linu Cherian
On Sat May 06, 2017 at 12:22:50AM +0200, Robert Richter wrote:
> On 05.05.17 17:38:04, Geetha sowjanya wrote:
> > From: Linu Cherian 
> > 
> > Cavium ThunderX2 SMMUv3 implementation has two Silicon Erratas.
> > 1. Errata ID #74
> >SMMU register alias Page 1 is not implemented
> > 2. Errata ID #126
> >SMMU doesnt support unique IRQ lines and also MSI for gerror, 
> >eventq and cmdq-sync
> > 
> > The following patchset does software workaround for these two erratas.
> > 
> > This series is based on patchset. 
> > https://www.spinics.net/lists/arm-kernel/msg578443.html
> > 
> > Changes from v1:
> >  Since the use of MIDR register is rejected and SMMU_IIDR is broken on this 
> >  silicon, as suggested by Will Deacon modified the patches to use ThunderX2 
> >  SMMUv3 IORT model number to enable errata workaround.
> > 
> > Changes from v2:
> >  Updated "Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt" document 
> > with 
> >  new SMMU option used to enable errata workaround.
> >  
> > Geetha Sowjanya (1):
> >   iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126
> > 
> > Linu Cherian (6):
> >   iommu/arm-smmu-v3: Introduce smmu option PAGE0_REGS_ONLY for ThunderX2
> > errata#74.
> >   iommu/arm-smmu-v3: Do resource size checks based on SMMU option
> > PAGE0_REGS_ONLY
> >   ACPICA: IORT: Add Cavium ThunderX2 SMMUv3 model definition.
> >   iommu/arm-smmu-v3: For ACPI based device probing, set PAGE0_REGS_ONLY
> > option for ThunderX2 SMMUv3 implementations.
> >   ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3
> > model
> >   arm64: Documentation: Add Cavium ThunderX2 SMMUv3 erratas
> 
> This split into patches does not look reasonable to me. 1 patch only
> for each workaround should be sufficient.
> 

* Should we not atleast keep the changes in drivers/acpi/iort.c and
 include/acpi/actbl2.h seperate, since they are outside smmuv3 driver ? 

* Probably i can merge the below patches, 
  1. iommu/arm-smmu-v3: Introduce smmu option PAGE0_REGS_ONLY for ThunderX2 
errata#74.
  2. iommu/arm-smmu-v3: Do resource size checks based on SMMU option
 PAGE0_REGS_O

Is that fine ?

Thanks.
-- 
Linu cherian


Re: [PATCH v3 2/7] iommu/arm-smmu-v3: Do resource size checks based on SMMU

2017-05-08 Thread Linu Cherian

On Mon May 08, 2017 at 12:09:32PM +0200, Robert Richter wrote:
> On 08.05.17 15:14:37, Linu Cherian wrote:
> > On Sat May 06, 2017 at 12:18:44AM +0200, Robert Richter wrote:
> > > On 05.05.17 17:38:06, Geetha sowjanya wrote:
> > > > From: Linu Cherian <linu.cher...@cavium.com>
> > > > 
> > > > With implementations supporting only page 0 register space,
> > > > resource size can be 64k as well and hence perform size checks
> > > > based on SMMU option PAGE0_REGS_ONLY.
> > > > 
> > > > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > > > platform_get_resource call, so that SMMU options are set beforehand.
> > > > 
> > > > Signed-off-by:  Linu Cherian <linu.cher...@cavium.com>
> > > > Signed-off-by:  Geetha Sowjanya <geethasowjanya.ak...@cavium.com>
> > > > ---
> > > >  drivers/iommu/arm-smmu-v3.c | 26 +-
> > > >  1 file changed, 17 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > > > index 107b4a6..f027676 100644
> > > > --- a/drivers/iommu/arm-smmu-v3.c
> > > > +++ b/drivers/iommu/arm-smmu-v3.c
> > > > @@ -2672,6 +2672,14 @@ static int arm_smmu_device_dt_probe(struct 
> > > > platform_device *pdev,
> > > > return ret;
> > > >  }
> > > >  
> > > > +static unsigned long arm_smmu_resource_size(struct arm_smmu_device 
> > > > *smmu)
> > > > +{
> > > > +   if (ARM_SMMU_PAGE0_REGS_ONLY(smmu))
> > > > +   return SZ_64K;
> > > > +   else
> > > > +   return SZ_128K;
> > > > +}
> > > > +
> > > 
> > > I think this can be dropped. See below.
> > > 
> > > >  static int arm_smmu_device_probe(struct platform_device *pdev)
> > > >  {
> > > > int irq, ret;
> > > > @@ -2688,9 +2696,17 @@ static int arm_smmu_device_probe(struct 
> > > > platform_device *pdev)
> > > > }
> > > > smmu->dev = dev;
> > > >  
> > > > +   if (dev->of_node) {
> > > > +   ret = arm_smmu_device_dt_probe(pdev, smmu);
> > > > +   } else {
> > > > +   ret = arm_smmu_device_acpi_probe(pdev, smmu);
> > > > +   if (ret == -ENODEV)
> > > > +   return ret;
> > > > +   }
> > > > +
> > > > /* Base address */
> > > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > > > -   if (resource_size(res) + 1 < SZ_128K) {
> > > > +   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
> > > > dev_err(dev, "MMIO region too small (%pr)\n", res);
> > > > return -EINVAL;
> > > > }
> > > 
> > > Why not just do the follwoing here:
> > > 
> > >   /* Base address */
> > >   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > >   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
> > >   dev_err(dev, "MMIO region too small (%pr)\n", res);
> > >   return -EINVAL;
> > >   }
> > >   ioaddr = res->start;
> > > 
> > > + /*
> > > +  * Override the size, for Cavium ThunderX2 implementation
> > > +  * which doesn't support the page 1 SMMU register space.
> > > +  */
> > > + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
> > > + res->end = res->size + SZ_64K -1;
> > > +
> > >   smmu->base = devm_ioremap_resource(dev, res);
> > >   if (IS_ERR(smmu->base))
> > >   return PTR_ERR(smmu->base);
> > 
> > 
> > This might not work, since platform_device_add is being called from
> > iort.c before the res->end gets fixed up here. 
> 
> It should. You added it with 128k and you get it back with
> platform_get_resource(), but before ioremap you shrink the size to
> 64k.
> 

The smmu devices are located at 64k offsets and not at 128k
offsets and hence this would be result in resource conflict during
platform_add_device ?

Code snippet from platform_add_device:

for (i = 0; i < pdev->num_resources; i++) {
struct resource *p, *r = >resource[i];

if (r->name == NULL)
r->name = dev_name(>dev);

p = r->parent;
if (!p) {
if (resource_type(r) == IORESOURCE_MEM)
p = _resource;
else if (resource_type(r) == IORESOURCE_IO)
p = _resource;
}

if (p && insert_resource(p, r)) {
dev_err(>dev, "failed to claim resource %d: 
%pR\n", i, r);
ret = -EBUSY;
goto failed;
}
}







-- 
Linu cherian


Re: [PATCH v3 2/7] iommu/arm-smmu-v3: Do resource size checks based on SMMU

2017-05-08 Thread Linu Cherian

On Mon May 08, 2017 at 12:09:32PM +0200, Robert Richter wrote:
> On 08.05.17 15:14:37, Linu Cherian wrote:
> > On Sat May 06, 2017 at 12:18:44AM +0200, Robert Richter wrote:
> > > On 05.05.17 17:38:06, Geetha sowjanya wrote:
> > > > From: Linu Cherian 
> > > > 
> > > > With implementations supporting only page 0 register space,
> > > > resource size can be 64k as well and hence perform size checks
> > > > based on SMMU option PAGE0_REGS_ONLY.
> > > > 
> > > > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > > > platform_get_resource call, so that SMMU options are set beforehand.
> > > > 
> > > > Signed-off-by:  Linu Cherian 
> > > > Signed-off-by:  Geetha Sowjanya 
> > > > ---
> > > >  drivers/iommu/arm-smmu-v3.c | 26 +-
> > > >  1 file changed, 17 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > > > index 107b4a6..f027676 100644
> > > > --- a/drivers/iommu/arm-smmu-v3.c
> > > > +++ b/drivers/iommu/arm-smmu-v3.c
> > > > @@ -2672,6 +2672,14 @@ static int arm_smmu_device_dt_probe(struct 
> > > > platform_device *pdev,
> > > > return ret;
> > > >  }
> > > >  
> > > > +static unsigned long arm_smmu_resource_size(struct arm_smmu_device 
> > > > *smmu)
> > > > +{
> > > > +   if (ARM_SMMU_PAGE0_REGS_ONLY(smmu))
> > > > +   return SZ_64K;
> > > > +   else
> > > > +   return SZ_128K;
> > > > +}
> > > > +
> > > 
> > > I think this can be dropped. See below.
> > > 
> > > >  static int arm_smmu_device_probe(struct platform_device *pdev)
> > > >  {
> > > > int irq, ret;
> > > > @@ -2688,9 +2696,17 @@ static int arm_smmu_device_probe(struct 
> > > > platform_device *pdev)
> > > > }
> > > > smmu->dev = dev;
> > > >  
> > > > +   if (dev->of_node) {
> > > > +   ret = arm_smmu_device_dt_probe(pdev, smmu);
> > > > +   } else {
> > > > +   ret = arm_smmu_device_acpi_probe(pdev, smmu);
> > > > +   if (ret == -ENODEV)
> > > > +   return ret;
> > > > +   }
> > > > +
> > > > /* Base address */
> > > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > > > -   if (resource_size(res) + 1 < SZ_128K) {
> > > > +   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
> > > > dev_err(dev, "MMIO region too small (%pr)\n", res);
> > > > return -EINVAL;
> > > > }
> > > 
> > > Why not just do the follwoing here:
> > > 
> > >   /* Base address */
> > >   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > >   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
> > >   dev_err(dev, "MMIO region too small (%pr)\n", res);
> > >   return -EINVAL;
> > >   }
> > >   ioaddr = res->start;
> > > 
> > > + /*
> > > +  * Override the size, for Cavium ThunderX2 implementation
> > > +  * which doesn't support the page 1 SMMU register space.
> > > +  */
> > > + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
> > > + res->end = res->size + SZ_64K -1;
> > > +
> > >   smmu->base = devm_ioremap_resource(dev, res);
> > >   if (IS_ERR(smmu->base))
> > >   return PTR_ERR(smmu->base);
> > 
> > 
> > This might not work, since platform_device_add is being called from
> > iort.c before the res->end gets fixed up here. 
> 
> It should. You added it with 128k and you get it back with
> platform_get_resource(), but before ioremap you shrink the size to
> 64k.
> 

The smmu devices are located at 64k offsets and not at 128k
offsets and hence this would be result in resource conflict during
platform_add_device ?

Code snippet from platform_add_device:

for (i = 0; i < pdev->num_resources; i++) {
struct resource *p, *r = >resource[i];

if (r->name == NULL)
r->name = dev_name(>dev);

p = r->parent;
if (!p) {
if (resource_type(r) == IORESOURCE_MEM)
p = _resource;
else if (resource_type(r) == IORESOURCE_IO)
p = _resource;
}

if (p && insert_resource(p, r)) {
dev_err(>dev, "failed to claim resource %d: 
%pR\n", i, r);
ret = -EBUSY;
goto failed;
}
}







-- 
Linu cherian


Re: [PATCH v3 2/7] iommu/arm-smmu-v3: Do resource size checks based on SMMU

2017-05-08 Thread Linu Cherian
On Sat May 06, 2017 at 12:18:44AM +0200, Robert Richter wrote:
> On 05.05.17 17:38:06, Geetha sowjanya wrote:
> > From: Linu Cherian <linu.cher...@cavium.com>
> > 
> > With implementations supporting only page 0 register space,
> > resource size can be 64k as well and hence perform size checks
> > based on SMMU option PAGE0_REGS_ONLY.
> > 
> > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > platform_get_resource call, so that SMMU options are set beforehand.
> > 
> > Signed-off-by:  Linu Cherian <linu.cher...@cavium.com>
> > Signed-off-by:  Geetha Sowjanya <geethasowjanya.ak...@cavium.com>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 26 +-
> >  1 file changed, 17 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index 107b4a6..f027676 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -2672,6 +2672,14 @@ static int arm_smmu_device_dt_probe(struct 
> > platform_device *pdev,
> > return ret;
> >  }
> >  
> > +static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
> > +{
> > +   if (ARM_SMMU_PAGE0_REGS_ONLY(smmu))
> > +   return SZ_64K;
> > +   else
> > +   return SZ_128K;
> > +}
> > +
> 
> I think this can be dropped. See below.
> 
> >  static int arm_smmu_device_probe(struct platform_device *pdev)
> >  {
> > int irq, ret;
> > @@ -2688,9 +2696,17 @@ static int arm_smmu_device_probe(struct 
> > platform_device *pdev)
> > }
> > smmu->dev = dev;
> >  
> > +   if (dev->of_node) {
> > +   ret = arm_smmu_device_dt_probe(pdev, smmu);
> > +   } else {
> > +   ret = arm_smmu_device_acpi_probe(pdev, smmu);
> > +   if (ret == -ENODEV)
> > +   return ret;
> > +   }
> > +
> > /* Base address */
> > res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > -   if (resource_size(res) + 1 < SZ_128K) {
> > +   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
> > dev_err(dev, "MMIO region too small (%pr)\n", res);
> > return -EINVAL;
> > }
> 
> Why not just do the follwoing here:
> 
>   /* Base address */
>   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
>   dev_err(dev, "MMIO region too small (%pr)\n", res);
>   return -EINVAL;
>   }
>   ioaddr = res->start;
> 
> + /*
> +  * Override the size, for Cavium ThunderX2 implementation
> +  * which doesn't support the page 1 SMMU register space.
> +  */
> + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
> + res->end = res->size + SZ_64K -1;
> +
>   smmu->base = devm_ioremap_resource(dev, res);
>   if (IS_ERR(smmu->base))
>   return PTR_ERR(smmu->base);


This might not work, since platform_device_add is being called from
iort.c before the res->end gets fixed up here. 


-- 
Linu cherian


Re: [PATCH v3 2/7] iommu/arm-smmu-v3: Do resource size checks based on SMMU

2017-05-08 Thread Linu Cherian
On Sat May 06, 2017 at 12:18:44AM +0200, Robert Richter wrote:
> On 05.05.17 17:38:06, Geetha sowjanya wrote:
> > From: Linu Cherian 
> > 
> > With implementations supporting only page 0 register space,
> > resource size can be 64k as well and hence perform size checks
> > based on SMMU option PAGE0_REGS_ONLY.
> > 
> > For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
> > platform_get_resource call, so that SMMU options are set beforehand.
> > 
> > Signed-off-by:  Linu Cherian 
> > Signed-off-by:  Geetha Sowjanya 
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 26 +-
> >  1 file changed, 17 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index 107b4a6..f027676 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -2672,6 +2672,14 @@ static int arm_smmu_device_dt_probe(struct 
> > platform_device *pdev,
> > return ret;
> >  }
> >  
> > +static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
> > +{
> > +   if (ARM_SMMU_PAGE0_REGS_ONLY(smmu))
> > +   return SZ_64K;
> > +   else
> > +   return SZ_128K;
> > +}
> > +
> 
> I think this can be dropped. See below.
> 
> >  static int arm_smmu_device_probe(struct platform_device *pdev)
> >  {
> > int irq, ret;
> > @@ -2688,9 +2696,17 @@ static int arm_smmu_device_probe(struct 
> > platform_device *pdev)
> > }
> > smmu->dev = dev;
> >  
> > +   if (dev->of_node) {
> > +   ret = arm_smmu_device_dt_probe(pdev, smmu);
> > +   } else {
> > +   ret = arm_smmu_device_acpi_probe(pdev, smmu);
> > +   if (ret == -ENODEV)
> > +   return ret;
> > +   }
> > +
> > /* Base address */
> > res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > -   if (resource_size(res) + 1 < SZ_128K) {
> > +   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
> > dev_err(dev, "MMIO region too small (%pr)\n", res);
> > return -EINVAL;
> > }
> 
> Why not just do the follwoing here:
> 
>   /* Base address */
>   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>   if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
>   dev_err(dev, "MMIO region too small (%pr)\n", res);
>   return -EINVAL;
>   }
>   ioaddr = res->start;
> 
> + /*
> +  * Override the size, for Cavium ThunderX2 implementation
> +  * which doesn't support the page 1 SMMU register space.
> +  */
> + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
> + res->end = res->size + SZ_64K -1;
> +
>   smmu->base = devm_ioremap_resource(dev, res);
>   if (IS_ERR(smmu->base))
>   return PTR_ERR(smmu->base);


This might not work, since platform_device_add is being called from
iort.c before the res->end gets fixed up here. 


-- 
Linu cherian


Re: [PATCH v3 1/7] iommu/arm-smmu-v3: Introduce SMMU option PAGE0_REGS_ONLY for ThunderX2 errata #74

2017-05-08 Thread Linu Cherian
On Sat May 06, 2017 at 01:03:28AM +0200, Robert Richter wrote:
> On 05.05.17 17:38:05, Geetha sowjanya wrote:
> > From: Linu Cherian <linu.cher...@cavium.com>
> > 
> > Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
> > and PAGE0_REGS_ONLY option will be enabled as an errata workaround.
> > 
> > This option when turned on, replaces all page 1 offsets used for
> > EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.
> > 
> > Signed-off-by: Linu Cherian <linu.cher...@cavium.com>
> > Signed-off-by: Geetha Sowjanya <geethasowjanya.ak...@cavium.com>
> > ---
> >  .../devicetree/bindings/iommu/arm,smmu-v3.txt  |  6 +++
> >  drivers/iommu/arm-smmu-v3.c| 44 
> > --
> >  2 files changed, 38 insertions(+), 12 deletions(-)
> 
> > @@ -1995,8 +2011,10 @@ static int arm_smmu_init_queues(struct 
> > arm_smmu_device *smmu)
> > if (!(smmu->features & ARM_SMMU_FEAT_PRI))
> > return 0;
> >  
> > -   return arm_smmu_init_one_queue(smmu, >priq.q, ARM_SMMU_PRIQ_PROD,
> > -  ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
> > +   return arm_smmu_init_one_queue(smmu, >priq.q,
> > +  ARM_SMMU_PRIQ_PROD(smmu),
> > +  ARM_SMMU_PRIQ_CONS(smmu),
> > +  PRIQ_ENT_DWORDS);
> 
> I would also suggest Robin's idea from the v1 review here. This works
> if we rework arm_smmu_init_one_queue() to pass addresses instead of
> offsets.
> 
> This would make these widespread offset calculations obsolete.
>

Have pasted here the relevant changes for doing fixups on smmu base instead
of offset to get feedback. 

This actually results in more lines of changes. If you think the below
approach is still better, will post a V4 of this series with this change.


+static inline unsigned long arm_smmu_page1_base(
+   struct arm_smmu_device *smmu)
+{
+   if (ARM_SMMU_PAGE0_REGS_ONLY(smmu))
+   return smmu->base;
+   else
+   return smmu->base + SZ_64K;
+}
+

@@ -1948,8 +1962,8 @@ static void arm_smmu_put_resv_regions(struct device *dev,
 /* Probing and initialisation functions */
 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
   struct arm_smmu_queue *q,
-  unsigned long prod_off,
-  unsigned long cons_off,
+  unsigned long prod_addr,
+  unsigned long cons_addr,
   size_t dwords)
 {
size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
@@ -1961,8 +1975,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device 
*smmu,
return -ENOMEM;
}
 
-   q->prod_reg = smmu->base + prod_off;
-   q->cons_reg = smmu->base + cons_off;
+   q->prod_reg = prod_addr;
+   q->cons_reg = cons_addr;
q->ent_dwords   = dwords;
 
q->q_base  = Q_BASE_RWA;
@@ -1977,17 +1991,25 @@ static int arm_smmu_init_one_queue(struct 
arm_smmu_device *smmu,
 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 {
int ret;
+   unsigned long page1_base, page0_base;
+
+   page0_base = smmu->base;
+   page1_base = arm_smmu_page1_base(smmu);
 
/* cmdq */
spin_lock_init(>cmdq.lock);
-   ret = arm_smmu_init_one_queue(smmu, >cmdq.q, ARM_SMMU_CMDQ_PROD,
- ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
+   ret = arm_smmu_init_one_queue(smmu, >cmdq.q, 
+ page0_base + ARM_SMMU_CMDQ_PROD,
+ page0_base + ARM_SMMU_CMDQ_CONS, 
+ CMDQ_ENT_DWORDS);
if (ret)
return ret;
 
/* evtq */
-   ret = arm_smmu_init_one_queue(smmu, >evtq.q, ARM_SMMU_EVTQ_PROD,
- ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
+   ret = arm_smmu_init_one_queue(smmu, >evtq.q,
+ page1_base + ARM_SMMU_EVTQ_PROD,
+ page1_base + ARM_SMMU_EVTQ_CONS,
+ EVTQ_ENT_DWORDS);
if (ret)
return ret;
 
@@ -1995,8 +2017,10 @@ static int arm_smmu_init_queues(struct arm_smmu_device 
*smmu)
if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0;
 
-   return arm_smmu_init_one_queue(smmu, >priq.q, ARM_SMMU_PRIQ_PROD,
-  ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
+   return arm_smmu_init_one_que

Re: [PATCH v3 1/7] iommu/arm-smmu-v3: Introduce SMMU option PAGE0_REGS_ONLY for ThunderX2 errata #74

2017-05-08 Thread Linu Cherian
On Sat May 06, 2017 at 01:03:28AM +0200, Robert Richter wrote:
> On 05.05.17 17:38:05, Geetha sowjanya wrote:
> > From: Linu Cherian 
> > 
> > Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
> > and PAGE0_REGS_ONLY option will be enabled as an errata workaround.
> > 
> > This option when turned on, replaces all page 1 offsets used for
> > EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.
> > 
> > Signed-off-by: Linu Cherian 
> > Signed-off-by: Geetha Sowjanya 
> > ---
> >  .../devicetree/bindings/iommu/arm,smmu-v3.txt  |  6 +++
> >  drivers/iommu/arm-smmu-v3.c| 44 
> > --
> >  2 files changed, 38 insertions(+), 12 deletions(-)
> 
> > @@ -1995,8 +2011,10 @@ static int arm_smmu_init_queues(struct 
> > arm_smmu_device *smmu)
> > if (!(smmu->features & ARM_SMMU_FEAT_PRI))
> > return 0;
> >  
> > -   return arm_smmu_init_one_queue(smmu, >priq.q, ARM_SMMU_PRIQ_PROD,
> > -  ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
> > +   return arm_smmu_init_one_queue(smmu, >priq.q,
> > +  ARM_SMMU_PRIQ_PROD(smmu),
> > +  ARM_SMMU_PRIQ_CONS(smmu),
> > +  PRIQ_ENT_DWORDS);
> 
> I would also suggest Robin's idea from the v1 review here. This works
> if we rework arm_smmu_init_one_queue() to pass addresses instead of
> offsets.
> 
> This would make these widespread offset calculations obsolete.
>

Have pasted here the relevant changes for doing fixups on smmu base instead
of offset to get feedback. 

This actually results in more lines of changes. If you think the below
approach is still better, will post a V4 of this series with this change.


+static inline unsigned long arm_smmu_page1_base(
+   struct arm_smmu_device *smmu)
+{
+   if (ARM_SMMU_PAGE0_REGS_ONLY(smmu))
+   return smmu->base;
+   else
+   return smmu->base + SZ_64K;
+}
+

@@ -1948,8 +1962,8 @@ static void arm_smmu_put_resv_regions(struct device *dev,
 /* Probing and initialisation functions */
 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
   struct arm_smmu_queue *q,
-  unsigned long prod_off,
-  unsigned long cons_off,
+  unsigned long prod_addr,
+  unsigned long cons_addr,
   size_t dwords)
 {
size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
@@ -1961,8 +1975,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device 
*smmu,
return -ENOMEM;
}
 
-   q->prod_reg = smmu->base + prod_off;
-   q->cons_reg = smmu->base + cons_off;
+   q->prod_reg = prod_addr;
+   q->cons_reg = cons_addr;
q->ent_dwords   = dwords;
 
q->q_base  = Q_BASE_RWA;
@@ -1977,17 +1991,25 @@ static int arm_smmu_init_one_queue(struct 
arm_smmu_device *smmu,
 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 {
int ret;
+   unsigned long page1_base, page0_base;
+
+   page0_base = smmu->base;
+   page1_base = arm_smmu_page1_base(smmu);
 
/* cmdq */
spin_lock_init(>cmdq.lock);
-   ret = arm_smmu_init_one_queue(smmu, >cmdq.q, ARM_SMMU_CMDQ_PROD,
- ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
+   ret = arm_smmu_init_one_queue(smmu, >cmdq.q, 
+ page0_base + ARM_SMMU_CMDQ_PROD,
+ page0_base + ARM_SMMU_CMDQ_CONS, 
+ CMDQ_ENT_DWORDS);
if (ret)
return ret;
 
/* evtq */
-   ret = arm_smmu_init_one_queue(smmu, >evtq.q, ARM_SMMU_EVTQ_PROD,
- ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
+   ret = arm_smmu_init_one_queue(smmu, >evtq.q,
+ page1_base + ARM_SMMU_EVTQ_PROD,
+ page1_base + ARM_SMMU_EVTQ_CONS,
+ EVTQ_ENT_DWORDS);
if (ret)
return ret;
 
@@ -1995,8 +2017,10 @@ static int arm_smmu_init_queues(struct arm_smmu_device 
*smmu)
if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0;
 
-   return arm_smmu_init_one_queue(smmu, >priq.q, ARM_SMMU_PRIQ_PROD,
-  ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
+   return arm_smmu_init_one_queue(smmu, >priq.q,
+  page1_base + ARM_SMMU_PRIQ_PROD,
+