Re: [PATCH v1 01/16] dt-bindings: iommu: mediatek: Increase max interrupt number

2022-07-05 Thread Tinghan Shen via iommu
On Tue, 2022-07-05 at 14:49 -0600, Rob Herring wrote:
> On Mon, Jul 04, 2022 at 06:00:13PM +0800, Tinghan Shen wrote:
> > mt8195 infra iommu has max 5 interrupts.
> > 
> > Signed-off-by: Tinghan Shen 
> > ---
> >  .../devicetree/bindings/iommu/mediatek,iommu.yaml| 12 +++-
> >  1 file changed, 11 insertions(+), 1 deletion(-)
> > 
> > diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
> > b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
> > index 2ae3bbad7f1a..27eb9f6aa3ce 100644
> > --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
> > +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
> > @@ -91,7 +91,8 @@ properties:
> >  maxItems: 1
> >  
> >interrupts:
> > -maxItems: 1
> > +minItems: 1
> > +maxItems: 5
> >  
> >clocks:
> >  items:
> > @@ -175,9 +176,18 @@ allOf:
> >const: mediatek,mt8195-iommu-infra
> >  
> >  then:
> > +  properties:
> > +interrupts:
> > +  maxItems: 1
> > +
> >required:
> >  - mediatek,larbs
> >  
> > +else:
> > +  properties:
> > +interrupts:
> > +  maxItems: 5
> 
> 5 is already the max.
> 
> minItems: 5
> 

Ok, I'll update in the next version.

Thanks,
TingHan

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 16/16] arm64: dts: mt8195: Add display node for vdosys0

2022-07-05 Thread Tinghan Shen via iommu
On Mon, 2022-07-04 at 14:39 +0200, Krzysztof Kozlowski wrote:
> On 04/07/2022 12:00, Tinghan Shen wrote:
> > From: "Jason-JH.Lin" 
> > 
> > Add display node for vdosys0 of mt8195.
> > 
> > Signed-off-by: Jason-JH.Lin 
> > Signed-off-by: Tinghan Shen 
> > ---
> >  arch/arm64/boot/dts/mediatek/mt8195.dtsi | 109 +++
> >  1 file changed, 109 insertions(+)
> > 
> > diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi 
> > b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
> > index 724c6ca837b6..faea8ef33e5a 100644
> > --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
> > +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
> > @@ -1961,6 +1961,7 @@
> > vdosys0: syscon@1c01a000 {
> > compatible = "mediatek,mt8195-mmsys", "syscon";
> > reg = <0 0x1c01a000 0 0x1000>;
> > +   mboxes = < 0 CMDQ_THR_PRIO_4>;
> > #clock-cells = <1>;
> > };
> >  
> > @@ -1976,6 +1977,114 @@
> > power-domains = < MT8195_POWER_DOMAIN_VENC_CORE1>;
> > };
> >  
> > +   ovl0: ovl@1c00 {
> > +   compatible = "mediatek,mt8195-disp-ovl",
> > +"mediatek,mt8183-disp-ovl";
> > +   reg = <0 0x1c00 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_OVL0>;
> > +   iommus = <_vdo M4U_PORT_L0_DISP_OVL0_RDMA0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x 0x1000>;
> > +   };
> > +
> > +   rdma0: rdma@1c002000 {
> > +   compatible = "mediatek,mt8195-disp-rdma";
> > +   reg = <0 0x1c002000 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_RDMA0>;
> > +   iommus = <_vdo M4U_PORT_L0_DISP_RDMA0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x2000 0x1000>;
> > +   };
> > +
> > +   color0: color@1c003000 {
> > +   compatible = "mediatek,mt8195-disp-color",
> > +"mediatek,mt8173-disp-color";
> > +   reg = <0 0x1c003000 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_COLOR0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x3000 0x1000>;
> > +   };
> > +
> > +   ccorr0: ccorr@1c004000 {
> > +   compatible = "mediatek,mt8195-disp-ccorr",
> > +"mediatek,mt8192-disp-ccorr";
> > +   reg = <0 0x1c004000 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_CCORR0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x4000 0x1000>;
> > +   };
> > +
> > +   aal0: aal@1c005000 {
> > +   compatible = "mediatek,mt8195-disp-aal",
> > +"mediatek,mt8183-disp-aal";
> > +   reg = <0 0x1c005000 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_AAL0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x5000 0x1000>;
> > +   };
> > +
> > +   gamma0: gamma@1c006000 {
> > +   compatible = "mediatek,mt8195-disp-gamma",
> > +"mediatek,mt8183-disp-gamma";
> > +   reg = <0 0x1c006000 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_GAMMA0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x6000 0x1000>;
> > +   };
> > +
> > +   dither0: dither@1c007000 {
> > +   compatible = "mediatek,mt8195-disp-dither",
> > +"mediatek,mt8183-disp-dither";
> > +   reg = <0 0x1c007000 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_DITHER0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x7000 0x1000>;
> > +   };
> > +
> > +   dsc0: dsc@1c009000 {
> > +   compatible = 

Re: [PATCH v1 02/16] dt-bindings: memory: mediatek: Update condition for mt8195 smi node

2022-07-05 Thread Tinghan Shen via iommu
On Mon, 2022-07-04 at 14:36 +0200, Krzysztof Kozlowski wrote:
> On 04/07/2022 12:00, Tinghan Shen wrote:
> > The max clock items for the dts node with compatible
> > 'mediatek,mt8195-smi-sub-common' should be 3.
> > 
> > However, the dtbs_check of such node will get following message,
> > arch/arm64/boot/dts/mediatek/mt8195-evb.dtb: smi@1401: clock-names: 
> > ['apb', 'smi', 'gals0']
> > is too long
> >  From schema: 
> > Documentation/devicetree/bindings/memory-controllers/mediatek,smi-
> > common.yaml
> > 
> > Remove the last 'else' checking to fix this error.
> 
> Missing fixes tag.
> 
> > 
> > Signed-off-by: Tinghan Shen 
> > ---
> >  .../memory-controllers/mediatek,smi-common.yaml| 10 +-
> >  1 file changed, 9 insertions(+), 1 deletion(-)
> > 
> > diff --git 
> > a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > index a98b359bf909..e5f553e2e12a 100644
> > --- 
> > a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > +++ 
> > b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > @@ -143,7 +143,15 @@ allOf:
> >  - const: gals0
> >  - const: gals1
> >  
> > -else:  # for gen2 HW that don't have gals
> > +  - if:  # for gen2 HW that don't have gals
> > +  properties:
> > +compatible:
> > +  enum:
> > +- mediatek,mt2712-smi-common
> > +- mediatek,mt8167-smi-common
> > +- mediatek,mt8173-smi-common
> > +
> 
> Without looking at the code, it's impossible to understand what you are
> doing here. The commit msg says one, but you are doing something else.
> 
> Write commit msg explaining what you want to achieve and what you are doing.
> 
> 
> Best regards,
> Krzysztof

Ok, I'll update in next version.

Thanks,
TingHan

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 16/16] arm64: dts: mt8195: Add display node for vdosys0

2022-07-05 Thread Tinghan Shen via iommu
On Mon, 2022-07-04 at 12:44 +0200, AngeloGioacchino Del Regno wrote:
> Il 04/07/22 12:00, Tinghan Shen ha scritto:
> > From: "Jason-JH.Lin" 
> > 
> > Add display node for vdosys0 of mt8195.
> > 
> > Signed-off-by: Jason-JH.Lin 
> > Signed-off-by: Tinghan Shen 
> > ---
> >   arch/arm64/boot/dts/mediatek/mt8195.dtsi | 109 +++
> >   1 file changed, 109 insertions(+)
> > 
> > diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi 
> > b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
> > index 724c6ca837b6..faea8ef33e5a 100644
> > --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
> > +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
> > @@ -1961,6 +1961,7 @@
> > vdosys0: syscon@1c01a000 {
> > compatible = "mediatek,mt8195-mmsys", "syscon";
> > reg = <0 0x1c01a000 0 0x1000>;
> > +   mboxes = < 0 CMDQ_THR_PRIO_4>;
> > #clock-cells = <1>;
> > };
> >   
> > @@ -1976,6 +1977,114 @@
> > power-domains = < MT8195_POWER_DOMAIN_VENC_CORE1>;
> > };
> >   
> > +   ovl0: ovl@1c00 {
> > +   compatible = "mediatek,mt8195-disp-ovl",
> > +"mediatek,mt8183-disp-ovl";
> 
> This fits in one line, please fix, here and all of the other instances of 
> that.
> 
> > +   reg = <0 0x1c00 0 0x1000>;
> > +   interrupts = ;
> > +   power-domains = < MT8195_POWER_DOMAIN_VDOSYS0>;
> > +   clocks = < CLK_VDO0_DISP_OVL0>;
> > +   iommus = <_vdo M4U_PORT_L0_DISP_OVL0_RDMA0>;
> > +   mediatek,gce-client-reg =
> > +< SUBSYS_1c00 0x 0x1000>;
> 
> Same for gce-client-reg.
> 
> Regards,
> Angelo

Ok, I'll update in next version.

Thanks,
TingHan

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 04/16] arm64: dts: mt8195: Disable watchdog external reset signal

2022-07-05 Thread Tinghan Shen via iommu
On Mon, 2022-07-04 at 12:30 +0200, AngeloGioacchino Del Regno wrote:
> Il 04/07/22 12:00, Tinghan Shen ha scritto:
> > Disable external output reset signal in first round of watchdog reset
> > to reserve wdt reset reason for debugging watchdog issue.
> 
> If my understanding of the commit decription is right, then we can clarify
> that with something like: "[...] for debugging eventual watchdog issues".
> 
> Otherwise, if this implies that disable-extrst is needed to avoid losing
> the reset reason stored in the WDT, you could say something like:
> 
> "Disable external output reset signal in the first round of watchdog reset
> to avoid losing the reset reason stored in the watchdog registers"
> 
> After which:
> 
> Reviewed-by: AngeloGioacchino Del Regno 
> 
> 

Ok, I'll update it in next version.

Thanks,
TingHan

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 02/16] dt-bindings: memory: mediatek: Update condition for mt8195 smi node

2022-07-05 Thread Tinghan Shen via iommu
On Mon, 2022-07-04 at 12:25 +0200, AngeloGioacchino Del Regno wrote:
> Il 04/07/22 12:00, Tinghan Shen ha scritto:
> > The max clock items for the dts node with compatible
> > 'mediatek,mt8195-smi-sub-common' should be 3.
> > 
> > However, the dtbs_check of such node will get following message,
> > arch/arm64/boot/dts/mediatek/mt8195-evb.dtb: smi@1401: clock-names: 
> > ['apb', 'smi', 'gals0']
> > is too long
> >   From schema: 
> > Documentation/devicetree/bindings/memory-controllers/mediatek,smi-
> > common.yaml
> > 
> > Remove the last 'else' checking to fix this error.
> > 
> > Signed-off-by: Tinghan Shen 
> > ---
> >   .../memory-controllers/mediatek,smi-common.yaml| 10 +-
> >   1 file changed, 9 insertions(+), 1 deletion(-)
> > 
> > diff --git 
> > a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > index a98b359bf909..e5f553e2e12a 100644
> > --- 
> > a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > +++ 
> > b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml
> > @@ -143,7 +143,15 @@ allOf:
> >   - const: gals0
> >   - const: gals1
> >   
> > -else:  # for gen2 HW that don't have gals
> > +  - if:  # for gen2 HW that don't have gals
> > +  properties:
> > +compatible:
> > +  enum:
> > +- mediatek,mt2712-smi-common
> 
> MT6795 also doesn't have any GALS, please add it in here.

Ok, I'll update it in next version.

Thanks,
TingHan

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 11/11] iommu/vt-d: Convert global spinlock into per domain lock

2022-07-05 Thread Lu Baolu
Using a global device_domain_lock spinlock to protect per-domain device
tracking lists is an inefficient way, especially considering this lock
is also needed in the hot paths. This optimizes the locking mechanism
by converting the global lock to per domain lock.

On the other hand, as the device tracking lists are never accessed in
any interrupt context, there is no need to disable interrupts while
spinning. Replace irqsave variant with spinlock calls.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.h |  1 +
 drivers/iommu/intel/iommu.c | 42 ++---
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 198c6c822ef4..df64d3d9c49a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -541,6 +541,7 @@ struct dmar_domain {
u8 force_snooping : 1;  /* Create IOPTEs with snoop control */
u8 set_pte_snp:1;
 
+   spinlock_t lock;/* Protect device tracking lists */
struct list_head devices;   /* all devices' list */
 
struct dma_pte  *pgd;   /* virtual address */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 46991e313bf3..e007049eb9ff 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -310,7 +310,6 @@ static int iommu_skip_te_disable;
 #define IDENTMAP_GFX   2
 #define IDENTMAP_AZALIA4
 
-static DEFINE_SPINLOCK(device_domain_lock);
 const struct iommu_ops intel_iommu_ops;
 
 static bool translation_pre_enabled(struct intel_iommu *iommu)
@@ -535,7 +534,7 @@ static int domain_update_device_node(struct dmar_domain 
*domain)
struct device_domain_info *info;
int nid = NUMA_NO_NODE;
 
-   spin_lock(_domain_lock);
+   spin_lock(>lock);
list_for_each_entry(info, >devices, link) {
/*
 * There could possibly be multiple device numa nodes as devices
@@ -547,7 +546,7 @@ static int domain_update_device_node(struct dmar_domain 
*domain)
if (nid != NUMA_NO_NODE)
break;
}
-   spin_unlock(_domain_lock);
+   spin_unlock(>lock);
 
return nid;
 }
@@ -1378,15 +1377,15 @@ iommu_support_dev_iotlb(struct dmar_domain *domain, 
struct intel_iommu *iommu,
if (!iommu->qi)
return NULL;
 
-   spin_lock(_domain_lock);
+   spin_lock(>lock);
list_for_each_entry(info, >devices, link) {
if (info->iommu == iommu && info->bus == bus &&
info->devfn == devfn) {
-   spin_unlock(_domain_lock);
+   spin_unlock(>lock);
return info->ats_supported ? info : NULL;
}
}
-   spin_unlock(_domain_lock);
+   spin_unlock(>lock);
 
return NULL;
 }
@@ -1396,7 +1395,7 @@ static void domain_update_iotlb(struct dmar_domain 
*domain)
struct device_domain_info *info;
bool has_iotlb_device = false;
 
-   spin_lock(_domain_lock);
+   spin_lock(>lock);
list_for_each_entry(info, >devices, link) {
if (info->ats_enabled) {
has_iotlb_device = true;
@@ -1404,7 +1403,7 @@ static void domain_update_iotlb(struct dmar_domain 
*domain)
}
}
domain->has_iotlb_device = has_iotlb_device;
-   spin_unlock(_domain_lock);
+   spin_unlock(>lock);
 }
 
 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
@@ -1500,10 +1499,10 @@ static void iommu_flush_dev_iotlb(struct dmar_domain 
*domain,
if (!domain->has_iotlb_device)
return;
 
-   spin_lock(_domain_lock);
+   spin_lock(>lock);
list_for_each_entry(info, >devices, link)
__iommu_flush_dev_iotlb(info, addr, mask);
-   spin_unlock(_domain_lock);
+   spin_unlock(>lock);
 }
 
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
@@ -1763,6 +1762,7 @@ static struct dmar_domain *alloc_domain(unsigned int type)
domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(>devices);
+   spin_lock_init(>lock);
 
return domain;
 }
@@ -2446,9 +2446,9 @@ static int domain_add_dev_info(struct dmar_domain 
*domain, struct device *dev)
if (ret)
return ret;
info->domain = domain;
-   spin_lock(_domain_lock);
+   spin_lock(>lock);
list_add(>link, >devices);
-   spin_unlock(_domain_lock);
+   spin_unlock(>lock);
 
/* PASID table is mandatory for a PCI device in scalable mode. */
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
@@ -4123,6 +4123,7 @@ static void domain_context_clear(struct 
device_domain_info *info)
 static void dmar_remove_one_dev_info(struct device *dev)
 {
struct 

[PATCH v4 10/11] iommu/vt-d: Use device_domain_lock accurately

2022-07-05 Thread Lu Baolu
The device_domain_lock is used to protect the device tracking list of
a domain. Remove unnecessary spin_lock/unlock()'s and move the necessary
ones around the list access.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.c | 57 +
 1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 899ae63e1ddf..46991e313bf3 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -535,15 +535,8 @@ static int domain_update_device_node(struct dmar_domain 
*domain)
struct device_domain_info *info;
int nid = NUMA_NO_NODE;
 
-   assert_spin_locked(_domain_lock);
-
-   if (list_empty(>devices))
-   return NUMA_NO_NODE;
-
+   spin_lock(_domain_lock);
list_for_each_entry(info, >devices, link) {
-   if (!info->dev)
-   continue;
-
/*
 * There could possibly be multiple device numa nodes as devices
 * within the same domain may sit behind different IOMMUs. There
@@ -554,6 +547,7 @@ static int domain_update_device_node(struct dmar_domain 
*domain)
if (nid != NUMA_NO_NODE)
break;
}
+   spin_unlock(_domain_lock);
 
return nid;
 }
@@ -1376,23 +1370,23 @@ static void __iommu_flush_iotlb(struct intel_iommu 
*iommu, u16 did,
 }
 
 static struct device_domain_info *
-iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
-u8 bus, u8 devfn)
+iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu,
+   u8 bus, u8 devfn)
 {
struct device_domain_info *info;
 
-   assert_spin_locked(_domain_lock);
-
if (!iommu->qi)
return NULL;
 
-   list_for_each_entry(info, >devices, link)
+   spin_lock(_domain_lock);
+   list_for_each_entry(info, >devices, link) {
if (info->iommu == iommu && info->bus == bus &&
info->devfn == devfn) {
-   if (info->ats_supported && info->dev)
-   return info;
-   break;
+   spin_unlock(_domain_lock);
+   return info->ats_supported ? info : NULL;
}
+   }
+   spin_unlock(_domain_lock);
 
return NULL;
 }
@@ -1402,23 +1396,21 @@ static void domain_update_iotlb(struct dmar_domain 
*domain)
struct device_domain_info *info;
bool has_iotlb_device = false;
 
-   assert_spin_locked(_domain_lock);
-
-   list_for_each_entry(info, >devices, link)
+   spin_lock(_domain_lock);
+   list_for_each_entry(info, >devices, link) {
if (info->ats_enabled) {
has_iotlb_device = true;
break;
}
-
+   }
domain->has_iotlb_device = has_iotlb_device;
+   spin_unlock(_domain_lock);
 }
 
 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 {
struct pci_dev *pdev;
 
-   assert_spin_locked(_domain_lock);
-
if (!info || !dev_is_pci(info->dev))
return;
 
@@ -1464,8 +1456,6 @@ static void iommu_disable_dev_iotlb(struct 
device_domain_info *info)
 {
struct pci_dev *pdev;
 
-   assert_spin_locked(_domain_lock);
-
if (!dev_is_pci(info->dev))
return;
 
@@ -1906,9 +1896,10 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
  struct pasid_table *table,
  u8 bus, u8 devfn)
 {
+   struct device_domain_info *info =
+   iommu_support_dev_iotlb(domain, iommu, bus, devfn);
u16 did = domain->iommu_did[iommu->seq_id];
int translation = CONTEXT_TT_MULTI_LEVEL;
-   struct device_domain_info *info = NULL;
struct context_entry *context;
int ret;
 
@@ -1922,9 +1913,7 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
 
BUG_ON(!domain->pgd);
 
-   spin_lock(_domain_lock);
spin_lock(>lock);
-
ret = -ENOMEM;
context = iommu_context_addr(iommu, bus, devfn, 1);
if (!context)
@@ -1975,7 +1964,6 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
 * Setup the Device-TLB enable bit and Page request
 * Enable bit:
 */
-   info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
if (info && info->ats_supported)
context_set_sm_dte(context);
if (info && info->pri_supported)
@@ -1998,7 +1986,6 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
goto out_unlock;
}
 
-   info = 

[PATCH v4 09/11] iommu/vt-d: Fold __dmar_remove_one_dev_info() into its caller

2022-07-05 Thread Lu Baolu
Fold __dmar_remove_one_dev_info() into dmar_remove_one_dev_info() which
is its only caller. Make the spin lock critical range only cover the
device list change code and remove some unnecessary checks.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.c | 31 +++
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a94fb69e1f9a..899ae63e1ddf 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -295,7 +295,6 @@ static LIST_HEAD(dmar_satc_units);
 static int g_num_of_iommus;
 
 static void dmar_remove_one_dev_info(struct device *dev);
-static void __dmar_remove_one_dev_info(struct device_domain_info *info);
 
 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
@@ -4137,20 +4136,12 @@ static void domain_context_clear(struct 
device_domain_info *info)
   _context_clear_one_cb, info);
 }
 
-static void __dmar_remove_one_dev_info(struct device_domain_info *info)
+static void dmar_remove_one_dev_info(struct device *dev)
 {
-   struct dmar_domain *domain;
-   struct intel_iommu *iommu;
-
-   assert_spin_locked(_domain_lock);
-
-   if (WARN_ON(!info))
-   return;
-
-   iommu = info->iommu;
-   domain = info->domain;
+   struct device_domain_info *info = dev_iommu_priv_get(dev);
+   struct intel_iommu *iommu = info->iommu;
 
-   if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
+   if (!dev_is_real_dma_subdevice(info->dev)) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
PASID_RID2PASID, false);
@@ -4160,19 +4151,11 @@ static void __dmar_remove_one_dev_info(struct 
device_domain_info *info)
intel_pasid_free_table(info->dev);
}
 
-   list_del(>link);
-   domain_detach_iommu(domain, iommu);
-}
-
-static void dmar_remove_one_dev_info(struct device *dev)
-{
-   struct device_domain_info *info;
-
spin_lock(_domain_lock);
-   info = dev_iommu_priv_get(dev);
-   if (info)
-   __dmar_remove_one_dev_info(info);
+   list_del(>link);
spin_unlock(_domain_lock);
+
+   domain_detach_iommu(info->domain, iommu);
 }
 
 static int md_domain_init(struct dmar_domain *domain, int guest_width)
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 07/11] iommu/vt-d: Acquiring lock in pasid manipulation helpers

2022-07-05 Thread Lu Baolu
The iommu->lock is used to protect the per-IOMMU pasid directory table
and pasid table. Move the spinlock acquisition/release into the helpers
to make the code self-contained.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.c |   2 -
 drivers/iommu/intel/pasid.c | 103 +++-
 drivers/iommu/intel/svm.c   |   3 --
 3 files changed, 55 insertions(+), 53 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7f03576e72d7..3d53de8c7634 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2489,7 +2489,6 @@ static int domain_add_dev_info(struct dmar_domain 
*domain, struct device *dev)
}
 
/* Setup the PASID entry for requests without PASID: */
-   spin_lock(>lock);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
@@ -2499,7 +2498,6 @@ static int domain_add_dev_info(struct dmar_domain 
*domain, struct device *dev)
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
-   spin_unlock(>lock);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 43f090381ec7..7792a1b2ebc4 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -450,17 +450,17 @@ void intel_pasid_tear_down_entry(struct intel_iommu 
*iommu, struct device *dev,
struct pasid_entry *pte;
u16 did, pgtt;
 
+   spin_lock(>lock);
pte = intel_pasid_get_entry(dev, pasid);
-   if (WARN_ON(!pte))
-   return;
-
-   if (!pasid_pte_is_present(pte))
+   if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
+   spin_unlock(>lock);
return;
+   }
 
did = pasid_get_domain_id(pte);
pgtt = pasid_pte_get_pgtt(pte);
-
intel_pasid_clear_entry(dev, pasid, fault_ignore);
+   spin_unlock(>lock);
 
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
@@ -496,22 +496,6 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
}
 }
 
-static inline int pasid_enable_wpe(struct pasid_entry *pte)
-{
-#ifdef CONFIG_X86
-   unsigned long cr0 = read_cr0();
-
-   /* CR0.WP is normally set but just to be sure */
-   if (unlikely(!(cr0 & X86_CR0_WP))) {
-   pr_err_ratelimited("No CPU write protect!\n");
-   return -EINVAL;
-   }
-#endif
-   pasid_set_wpe(pte);
-
-   return 0;
-};
-
 /*
  * Set up the scalable mode pasid table entry for first only
  * translation type.
@@ -528,39 +512,52 @@ int intel_pasid_setup_first_level(struct intel_iommu 
*iommu,
return -EINVAL;
}
 
-   pte = intel_pasid_get_entry(dev, pasid);
-   if (WARN_ON(!pte))
+   if (flags & PASID_FLAG_SUPERVISOR_MODE) {
+#ifdef CONFIG_X86
+   unsigned long cr0 = read_cr0();
+
+   /* CR0.WP is normally set but just to be sure */
+   if (unlikely(!(cr0 & X86_CR0_WP))) {
+   pr_err("No CPU write protect!\n");
+   return -EINVAL;
+   }
+#endif
+   if (!ecap_srs(iommu->ecap)) {
+   pr_err("No supervisor request support on %s\n",
+  iommu->name);
+   return -EINVAL;
+   }
+   }
+
+   if ((flags & PASID_FLAG_FL5LP) && !cap_5lp_support(iommu->cap)) {
+   pr_err("No 5-level paging support for first-level on %s\n",
+  iommu->name);
return -EINVAL;
+   }
 
-   /* Caller must ensure PASID entry is not in use. */
-   if (pasid_pte_is_present(pte))
+   spin_lock(>lock);
+   pte = intel_pasid_get_entry(dev, pasid);
+   if (!pte) {
+   spin_unlock(>lock);
+   return -ENODEV;
+   }
+
+   if (pasid_pte_is_present(pte)) {
+   spin_unlock(>lock);
return -EBUSY;
+   }
 
pasid_clear_entry(pte);
 
/* Setup the first level page table pointer: */
pasid_set_flptr(pte, (u64)__pa(pgd));
if (flags & PASID_FLAG_SUPERVISOR_MODE) {
-   if (!ecap_srs(iommu->ecap)) {
-   pr_err("No supervisor request support on %s\n",
-  iommu->name);
-   return -EINVAL;
-   }
pasid_set_sre(pte);
-   if (pasid_enable_wpe(pte))
-   return -EINVAL;
-
+   pasid_set_wpe(pte);
}
 
-   if (flags & 

[PATCH v4 08/11] iommu/vt-d: Check device list of domain in domain free path

2022-07-05 Thread Lu Baolu
When the IOMMU domain is about to be freed, it should not be set on any
device. Instead of silently dealing with some bug cases, it's better to
trigger a warning to report and fix any potential bugs at the first time.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3d53de8c7634..a94fb69e1f9a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -294,7 +294,6 @@ static LIST_HEAD(dmar_satc_units);
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
 
-static void domain_remove_dev_info(struct dmar_domain *domain);
 static void dmar_remove_one_dev_info(struct device *dev);
 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
 
@@ -1841,10 +1840,6 @@ static inline int guestwidth_to_adjustwidth(int gaw)
 
 static void domain_exit(struct dmar_domain *domain)
 {
-
-   /* Remove associated devices and clear attached or cached domains */
-   domain_remove_dev_info(domain);
-
if (domain->pgd) {
LIST_HEAD(freelist);
 
@@ -1852,6 +1847,9 @@ static void domain_exit(struct dmar_domain *domain)
put_pages_list();
}
 
+   if (WARN_ON(!list_empty(>devices)))
+   return;
+
kfree(domain);
 }
 
@@ -2333,16 +2331,6 @@ static void domain_context_clear_one(struct 
device_domain_info *info, u8 bus, u8
__iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
 }
 
-static void domain_remove_dev_info(struct dmar_domain *domain)
-{
-   struct device_domain_info *info, *tmp;
-
-   spin_lock(_domain_lock);
-   list_for_each_entry_safe(info, tmp, >devices, link)
-   __dmar_remove_one_dev_info(info);
-   spin_unlock(_domain_lock);
-}
-
 static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 05/11] iommu/vt-d: Replace spin_lock_irqsave() with spin_lock()

2022-07-05 Thread Lu Baolu
The iommu->lock is used to protect changes in root/context/pasid tables
and domain ID allocation. There's no use case to change these resources
in any interrupt context. Therefore, it is unnecessary to disable the
interrupts when the spinlock is held. The same thing happens on the
device_domain_lock side, which protects the device domain attachment
information. This replaces spin_lock/unlock_irqsave/irqrestore() calls
with the normal spin_lock/unlock().

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/debugfs.c |  6 ++--
 drivers/iommu/intel/iommu.c   | 66 ++-
 drivers/iommu/intel/svm.c |  6 ++--
 3 files changed, 32 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 6e1a3f88abc8..1f925285104e 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -263,10 +263,9 @@ static void ctx_tbl_walk(struct seq_file *m, struct 
intel_iommu *iommu, u16 bus)
 
 static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu)
 {
-   unsigned long flags;
u16 bus;
 
-   spin_lock_irqsave(>lock, flags);
+   spin_lock(>lock);
seq_printf(m, "IOMMU %s: Root Table Address: 0x%llx\n", iommu->name,
   (u64)virt_to_phys(iommu->root_entry));
seq_puts(m, 
"B.D.F\tRoot_entry\t\t\t\tContext_entry\t\t\t\tPASID\tPASID_table_entry\n");
@@ -278,8 +277,7 @@ static void root_tbl_walk(struct seq_file *m, struct 
intel_iommu *iommu)
 */
for (bus = 0; bus < 256; bus++)
ctx_tbl_walk(m, iommu, bus);
-
-   spin_unlock_irqrestore(>lock, flags);
+   spin_unlock(>lock);
 }
 
 static int dmar_translation_struct_show(struct seq_file *m, void *unused)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ff49c9460ede..93f01082dce1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -797,13 +797,12 @@ static int device_context_mapped(struct intel_iommu 
*iommu, u8 bus, u8 devfn)
 {
struct context_entry *context;
int ret = 0;
-   unsigned long flags;
 
-   spin_lock_irqsave(>lock, flags);
+   spin_lock(>lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (context)
ret = context_present(context);
-   spin_unlock_irqrestore(>lock, flags);
+   spin_unlock(>lock);
return ret;
 }
 
@@ -1508,17 +1507,15 @@ static void __iommu_flush_dev_iotlb(struct 
device_domain_info *info,
 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
  u64 addr, unsigned mask)
 {
-   unsigned long flags;
struct device_domain_info *info;
 
if (!domain->has_iotlb_device)
return;
 
-   spin_lock_irqsave(_domain_lock, flags);
+   spin_lock(_domain_lock);
list_for_each_entry(info, >devices, link)
__iommu_flush_dev_iotlb(info, addr, mask);
-
-   spin_unlock_irqrestore(_domain_lock, flags);
+   spin_unlock(_domain_lock);
 }
 
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
@@ -1917,7 +1914,6 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
int translation = CONTEXT_TT_MULTI_LEVEL;
struct device_domain_info *info = NULL;
struct context_entry *context;
-   unsigned long flags;
int ret;
 
WARN_ON(did == 0);
@@ -1930,7 +1926,7 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
 
BUG_ON(!domain->pgd);
 
-   spin_lock_irqsave(_domain_lock, flags);
+   spin_lock(_domain_lock);
spin_lock(>lock);
 
ret = -ENOMEM;
@@ -2052,7 +2048,7 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
 
 out_unlock:
spin_unlock(>lock);
-   spin_unlock_irqrestore(_domain_lock, flags);
+   spin_unlock(_domain_lock);
 
return ret;
 }
@@ -2296,16 +2292,15 @@ static void domain_context_clear_one(struct 
device_domain_info *info, u8 bus, u8
 {
struct intel_iommu *iommu = info->iommu;
struct context_entry *context;
-   unsigned long flags;
u16 did_old;
 
if (!iommu)
return;
 
-   spin_lock_irqsave(>lock, flags);
+   spin_lock(>lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (!context) {
-   spin_unlock_irqrestore(>lock, flags);
+   spin_unlock(>lock);
return;
}
 
@@ -2320,7 +2315,7 @@ static void domain_context_clear_one(struct 
device_domain_info *info, u8 bus, u8
 
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
-   spin_unlock_irqrestore(>lock, flags);
+   spin_unlock(>lock);
iommu->flush.flush_context(iommu,
   did_old,
   (((u16)bus) << 8) | devfn,
@@ -2342,12 +2337,11 @@ static void 

[PATCH v4 06/11] iommu/vt-d: Acquiring lock in domain ID allocation helpers

2022-07-05 Thread Lu Baolu
The iommu->lock is used to protect the per-IOMMU domain ID resource.
Moving the lock into the ID alloc/free helpers makes the code more
compact. At the same time, the device_domain_lock is irrelevant to
the domain ID resource, remove its assertion as well.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 93f01082dce1..7f03576e72d7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1779,16 +1779,13 @@ static struct dmar_domain *alloc_domain(unsigned int 
type)
return domain;
 }
 
-/* Must be called with iommu->lock */
 static int domain_attach_iommu(struct dmar_domain *domain,
   struct intel_iommu *iommu)
 {
unsigned long ndomains;
-   int num;
-
-   assert_spin_locked(_domain_lock);
-   assert_spin_locked(>lock);
+   int num, ret = 0;
 
+   spin_lock(>lock);
domain->iommu_refcnt[iommu->seq_id] += 1;
if (domain->iommu_refcnt[iommu->seq_id] == 1) {
ndomains = cap_ndoms(iommu->cap);
@@ -1797,7 +1794,8 @@ static int domain_attach_iommu(struct dmar_domain *domain,
if (num >= ndomains) {
pr_err("%s: No free domain ids\n", iommu->name);
domain->iommu_refcnt[iommu->seq_id] -= 1;
-   return -ENOSPC;
+   ret = -ENOSPC;
+   goto out_unlock;
}
 
set_bit(num, iommu->domain_ids);
@@ -1806,7 +1804,9 @@ static int domain_attach_iommu(struct dmar_domain *domain,
domain_update_iommu_cap(domain);
}
 
-   return 0;
+out_unlock:
+   spin_unlock(>lock);
+   return ret;
 }
 
 static void domain_detach_iommu(struct dmar_domain *domain,
@@ -1814,9 +1814,7 @@ static void domain_detach_iommu(struct dmar_domain 
*domain,
 {
int num;
 
-   assert_spin_locked(_domain_lock);
-   assert_spin_locked(>lock);
-
+   spin_lock(>lock);
domain->iommu_refcnt[iommu->seq_id] -= 1;
if (domain->iommu_refcnt[iommu->seq_id] == 0) {
num = domain->iommu_did[iommu->seq_id];
@@ -1824,6 +1822,7 @@ static void domain_detach_iommu(struct dmar_domain 
*domain,
domain_update_iommu_cap(domain);
domain->iommu_did[iommu->seq_id] = 0;
}
+   spin_unlock(>lock);
 }
 
 static inline int guestwidth_to_adjustwidth(int gaw)
@@ -2472,9 +2471,7 @@ static int domain_add_dev_info(struct dmar_domain 
*domain, struct device *dev)
 
spin_lock(_domain_lock);
info->domain = domain;
-   spin_lock(>lock);
ret = domain_attach_iommu(domain, iommu);
-   spin_unlock(>lock);
if (ret) {
spin_unlock(_domain_lock);
return ret;
@@ -4178,10 +4175,7 @@ static void __dmar_remove_one_dev_info(struct 
device_domain_info *info)
}
 
list_del(>link);
-
-   spin_lock(>lock);
domain_detach_iommu(domain, iommu);
-   spin_unlock(>lock);
 }
 
 static void dmar_remove_one_dev_info(struct device *dev)
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 04/11] iommu/vt-d: Unnecessary spinlock for root table alloc and free

2022-07-05 Thread Lu Baolu
The IOMMU root table is allocated and freed in the IOMMU initialization
code in static boot or hot-remove paths. There's no need for a spinlock.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.c | 18 +-
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 77915d61f7ec..ff49c9460ede 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -809,14 +809,12 @@ static int device_context_mapped(struct intel_iommu 
*iommu, u8 bus, u8 devfn)
 
 static void free_context_table(struct intel_iommu *iommu)
 {
-   int i;
-   unsigned long flags;
struct context_entry *context;
+   int i;
+
+   if (!iommu->root_entry)
+   return;
 
-   spin_lock_irqsave(>lock, flags);
-   if (!iommu->root_entry) {
-   goto out;
-   }
for (i = 0; i < ROOT_ENTRY_NR; i++) {
context = iommu_context_addr(iommu, i, 0, 0);
if (context)
@@ -828,12 +826,10 @@ static void free_context_table(struct intel_iommu *iommu)
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context)
free_pgtable_page(context);
-
}
+
free_pgtable_page(iommu->root_entry);
iommu->root_entry = NULL;
-out:
-   spin_unlock_irqrestore(>lock, flags);
 }
 
 #ifdef CONFIG_DMAR_DEBUG
@@ -1232,7 +1228,6 @@ static void domain_unmap(struct dmar_domain *domain, 
unsigned long start_pfn,
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
struct root_entry *root;
-   unsigned long flags;
 
root = (struct root_entry *)alloc_pgtable_page(iommu->node);
if (!root) {
@@ -1242,10 +1237,7 @@ static int iommu_alloc_root_entry(struct intel_iommu 
*iommu)
}
 
__iommu_flush_cache(iommu, root, ROOT_SIZE);
-
-   spin_lock_irqsave(>lock, flags);
iommu->root_entry = root;
-   spin_unlock_irqrestore(>lock, flags);
 
return 0;
 }
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 02/11] iommu/vt-d: Remove clearing translation data in disable_dmar_iommu()

2022-07-05 Thread Lu Baolu
The disable_dmar_iommu() is called when IOMMU initialization fails or
the IOMMU is hot-removed from the system. In both cases, there is no
need to clear the IOMMU translation data structures for devices.

On the initialization path, the device probing only happens after the
IOMMU is initialized successfully, hence there're no translation data
structures.

On the hot-remove path, there is no real use case where the IOMMU is
hot-removed, but the devices that it manages are still alive in the
system. The translation data structures were torn down during device
release, hence there's no need to repeat it in IOMMU hot-remove path
either. This removes the unnecessary code and only leaves a check.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/pasid.h |  1 +
 drivers/iommu/intel/iommu.c | 21 +++--
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index bf5b937848b4..20c54e50f533 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -39,6 +39,7 @@
  * only and pass-through transfer modes.
  */
 #define FLPT_DEFAULT_DID   1
+#define NUM_RESERVED_DID   2
 
 /*
  * The SUPERVISOR_MODE flag indicates a first level translation which
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3b6571681bdd..43aaec5bdd84 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1716,23 +1716,16 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 
 static void disable_dmar_iommu(struct intel_iommu *iommu)
 {
-   struct device_domain_info *info, *tmp;
-   unsigned long flags;
-
if (!iommu->domain_ids)
return;
 
-   spin_lock_irqsave(_domain_lock, flags);
-   list_for_each_entry_safe(info, tmp, _domain_list, global) {
-   if (info->iommu != iommu)
-   continue;
-
-   if (!info->dev || !info->domain)
-   continue;
-
-   __dmar_remove_one_dev_info(info);
-   }
-   spin_unlock_irqrestore(_domain_lock, flags);
+   /*
+* All iommu domains must have been detached from the devices,
+* hence there should be no domain IDs in use.
+*/
+   if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
+   > NUM_RESERVED_DID))
+   return;
 
if (iommu->gcmd & DMA_GCMD_TE)
iommu_disable_translation(iommu);
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 03/11] iommu/vt-d: Use pci_get_domain_bus_and_slot() in pgtable_walk()

2022-07-05 Thread Lu Baolu
Use pci_get_domain_bus_and_slot() instead of searching the global list
to retrieve the pci device pointer. This also removes the global
device_domain_list as there isn't any consumer anymore.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.h |  1 -
 drivers/iommu/intel/iommu.c | 34 ++
 2 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 8deb745d8b36..198c6c822ef4 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -609,7 +609,6 @@ struct intel_iommu {
 /* PCI domain-device relationship */
 struct device_domain_info {
struct list_head link;  /* link to domain siblings */
-   struct list_head global; /* link to global list */
u32 segment;/* PCI segment number */
u8 bus; /* PCI bus number */
u8 devfn;   /* PCI devfn number */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 43aaec5bdd84..77915d61f7ec 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -131,8 +131,6 @@ static struct intel_iommu **g_iommus;
 
 static void __init check_tylersburg_isoch(void);
 static int rwbf_quirk;
-static inline struct device_domain_info *
-dmar_search_domain_by_dev_info(int segment, int bus, int devfn);
 
 /*
  * set to 1 to panic kernel if can't successfully enable VT-d
@@ -315,8 +313,6 @@ static int iommu_skip_te_disable;
 #define IDENTMAP_AZALIA4
 
 static DEFINE_SPINLOCK(device_domain_lock);
-static LIST_HEAD(device_domain_list);
-
 const struct iommu_ops intel_iommu_ops;
 
 static bool translation_pre_enabled(struct intel_iommu *iommu)
@@ -846,9 +842,14 @@ static void pgtable_walk(struct intel_iommu *iommu, 
unsigned long pfn, u8 bus, u
struct device_domain_info *info;
struct dma_pte *parent, *pte;
struct dmar_domain *domain;
+   struct pci_dev *pdev;
int offset, level;
 
-   info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+   pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
+   if (!pdev)
+   return;
+
+   info = dev_iommu_priv_get(>dev);
if (!info || !info->domain) {
pr_info("device [%02x:%02x.%d] not probed\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -2357,19 +2358,6 @@ static void domain_remove_dev_info(struct dmar_domain 
*domain)
spin_unlock_irqrestore(_domain_lock, flags);
 }
 
-static inline struct device_domain_info *
-dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
-{
-   struct device_domain_info *info;
-
-   list_for_each_entry(info, _domain_list, global)
-   if (info->segment == segment && info->bus == bus &&
-   info->devfn == devfn)
-   return info;
-
-   return NULL;
-}
-
 static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
@@ -4573,7 +4561,6 @@ static struct iommu_device 
*intel_iommu_probe_device(struct device *dev)
struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
struct device_domain_info *info;
struct intel_iommu *iommu;
-   unsigned long flags;
u8 bus, devfn;
 
iommu = device_to_iommu(dev, , );
@@ -4616,10 +4603,7 @@ static struct iommu_device 
*intel_iommu_probe_device(struct device *dev)
}
}
 
-   spin_lock_irqsave(_domain_lock, flags);
-   list_add(>global, _domain_list);
dev_iommu_priv_set(dev, info);
-   spin_unlock_irqrestore(_domain_lock, flags);
 
return >iommu;
 }
@@ -4627,15 +4611,9 @@ static struct iommu_device 
*intel_iommu_probe_device(struct device *dev)
 static void intel_iommu_release_device(struct device *dev)
 {
struct device_domain_info *info = dev_iommu_priv_get(dev);
-   unsigned long flags;
 
dmar_remove_one_dev_info(dev);
-
-   spin_lock_irqsave(_domain_lock, flags);
dev_iommu_priv_set(dev, NULL);
-   list_del(>global);
-   spin_unlock_irqrestore(_domain_lock, flags);
-
kfree(info);
set_dma_ops(dev, NULL);
 }
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 00/11] iommu/vt-d: Optimize the use of locks

2022-07-05 Thread Lu Baolu
Hi folks,

This series tries to optimize the uses of two locks in the Intel IOMMU
driver:

- The intel_iommu::lock is used to protect the IOMMU resources shared by
  devices. They include the IOMMU root and context tables, the pasid
  tables and the domain IDs.
- The global device_domain_lock is used to protect the global and the
  per-domain device tracking lists.

The optimization includes:

- Remove the unnecessary global device tracking list;
- Remove unnecessary locking;
- Reduce the scope of the lock as much as possible, that is, use the
  lock only where necessary;
- The global lock is transformed into a local lock to improve the
  efficiency.

This series is also available on github:
https://github.com/LuBaolu/intel-iommu/commits/intel-iommu-lock-optimization-v4

Your comments and suggestions are very appreciated.

Best regards,
baolu

Change log:

v4:
 - Kevin pointed out that the copy table failure path needs some
   improvements. We agreed that it worth a separated series to handle
   all improvements on copy table.
 - Kevin suggested that we could put all spinlock replacement changes in
   a single patch. Updated in this series.
 - Others look good to Kevin. (Thanks and very appreciated for all the
   review comments.)

v3:
 - 
https://lore.kernel.org/linux-iommu/20220629074725.2331441-1-baolu...@linux.intel.com/
 - Split reduction of lock ranges from changing irqsave.
   
https://lore.kernel.org/linux-iommu/bn9pr11mb52760a3d7c6bf1af9c9d34658c...@bn9pr11mb5276.namprd11.prod.outlook.com/
 - Fully initialize the dev_info before adding it to the list.
   
https://lore.kernel.org/linux-iommu/bn9pr11mb52764d7cd86448c5e4eb46668c...@bn9pr11mb5276.namprd11.prod.outlook.com/
 - Various code and comments refinement.

v2:
 - 
https://lore.kernel.org/linux-iommu/20220614025137.1632762-1-baolu...@linux.intel.com/
 - Split the lock-free page walk issue into a new patch:
   
https://lore.kernel.org/linux-iommu/20220609070811.902868-1-baolu...@linux.intel.com/
 - Drop the conversion from spinlock to mutex and make this series
   cleanup purpose only.
 - Address several comments received during v1 review.

v1:
 - 
https://lore.kernel.org/linux-iommu/20220527063019.3112905-1-baolu...@linux.intel.com/
 - Initial post.

Lu Baolu (11):
  iommu/vt-d: debugfs: Remove device_domain_lock usage
  iommu/vt-d: Remove clearing translation data in disable_dmar_iommu()
  iommu/vt-d: Use pci_get_domain_bus_and_slot() in pgtable_walk()
  iommu/vt-d: Unnecessary spinlock for root table alloc and free
  iommu/vt-d: Replace spin_lock_irqsave() with spin_lock()
  iommu/vt-d: Acquiring lock in domain ID allocation helpers
  iommu/vt-d: Acquiring lock in pasid manipulation helpers
  iommu/vt-d: Check device list of domain in domain free path
  iommu/vt-d: Fold __dmar_remove_one_dev_info() into its caller
  iommu/vt-d: Use device_domain_lock accurately
  iommu/vt-d: Convert global spinlock into per domain lock

 drivers/iommu/intel/iommu.h   |   3 +-
 drivers/iommu/intel/pasid.h   |   1 +
 drivers/iommu/intel/debugfs.c |  49 ---
 drivers/iommu/intel/iommu.c   | 249 ++
 drivers/iommu/intel/pasid.c   | 103 +++---
 drivers/iommu/intel/svm.c |   5 +-
 6 files changed, 163 insertions(+), 247 deletions(-)

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 01/11] iommu/vt-d: debugfs: Remove device_domain_lock usage

2022-07-05 Thread Lu Baolu
The domain_translation_struct debugfs node is used to dump the DMAR page
tables for the PCI devices. It potentially races with setting domains to
devices. The existing code uses the global spinlock device_domain_lock to
avoid the races.

This removes the use of device_domain_lock outside of iommu.c by replacing
it with the group mutex lock. Using the group mutex lock is cleaner and
more compatible to following cleanups.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.h   |  1 -
 drivers/iommu/intel/debugfs.c | 43 +--
 drivers/iommu/intel/iommu.c   |  2 +-
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 8285fcfa5fea..8deb745d8b36 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -480,7 +480,6 @@ enum {
 #define VTD_FLAG_SVM_CAPABLE   (1 << 2)
 
 extern int intel_iommu_sm;
-extern spinlock_t device_domain_lock;
 
 #define sm_supported(iommu)(intel_iommu_sm && ecap_smts((iommu)->ecap))
 #define pasid_supported(iommu) (sm_supported(iommu) && \
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..6e1a3f88abc8 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -342,13 +342,13 @@ static void pgtable_walk_level(struct seq_file *m, struct 
dma_pte *pde,
}
 }
 
-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
 {
-   struct device_domain_info *info = dev_iommu_priv_get(dev);
-   struct dmar_domain *domain = info->domain;
+   struct dmar_domain *domain;
struct seq_file *m = data;
u64 path[6] = { 0 };
 
+   domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
if (!domain)
return 0;
 
@@ -359,20 +359,39 @@ static int show_device_domain_translation(struct device 
*dev, void *data)
pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');
 
-   return 0;
+   /* Don't iterate */
+   return 1;
 }
 
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int show_device_domain_translation(struct device *dev, void *data)
 {
-   unsigned long flags;
-   int ret;
+   struct iommu_group *group;
 
-   spin_lock_irqsave(_domain_lock, flags);
-   ret = bus_for_each_dev(_bus_type, NULL, m,
-  show_device_domain_translation);
-   spin_unlock_irqrestore(_domain_lock, flags);
+   group = iommu_group_get(dev);
+   if (group) {
+   /*
+* The group->mutex is held across the callback, which will
+* block calls to iommu_attach/detach_group/device. Hence,
+* the domain of the device will not change during traversal.
+*
+* All devices in an iommu group share a single domain, hence
+* we only dump the domain of the first device. Even though,
+* this code still possibly races with the iommu_unmap()
+* interface. This could be solved by RCU-freeing the page
+* table pages in the iommu_unmap() path.
+*/
+   iommu_group_for_each_dev(group, data,
+__show_device_domain_translation);
+   iommu_group_put(group);
+   }
 
-   return ret;
+   return 0;
+}
+
+static int domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+   return bus_for_each_dev(_bus_type, NULL, m,
+   show_device_domain_translation);
 }
 DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
 
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 10bda4bec8fe..3b6571681bdd 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -314,7 +314,7 @@ static int iommu_skip_te_disable;
 #define IDENTMAP_GFX   2
 #define IDENTMAP_AZALIA4
 
-DEFINE_SPINLOCK(device_domain_lock);
+static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
 const struct iommu_ops intel_iommu_ops;
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[linux-next:master] BUILD REGRESSION 2a2aa3f05338270aecbe2492fda910d6c17e0102

2022-07-05 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 2a2aa3f05338270aecbe2492fda910d6c17e0102  Add linux-next specific 
files for 20220705

Error/Warning reports:

https://lore.kernel.org/linux-doc/202207051821.3f0erisl-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

Documentation/PCI/endpoint/pci-vntb-function.rst:82: WARNING: Unexpected 
indentation.
Documentation/PCI/endpoint/pci-vntb-howto.rst:131: WARNING: Title underline too 
short.
drivers/pci/endpoint/functions/pci-epf-vntb.c:975:5: warning: no previous 
prototype for 'pci_read' [-Wmissing-prototypes]
drivers/pci/endpoint/functions/pci-epf-vntb.c:984:5: warning: no previous 
prototype for 'pci_write' [-Wmissing-prototypes]

Unverified Error/Warning (likely false positive, please contact us if 
interested):

block/partitions/efi.c:223:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
block/sed-opal.c:427:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
crypto/asymmetric_keys/pkcs7_verify.c:311:1: internal compiler error: in 
arc_ifcvt, at config/arc/arc.c:9637
drivers/ata/libata-core.c:2802:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/ata/libata-eh.c:2842:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/ata/sata_dwc_460ex.c:691:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/base/power/runtime.c:1570:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/block/rbd.c:6142:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/bluetooth/hci_ll.c:588:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/bluetooth/hci_qca.c:2137:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/cdrom/cdrom.c:1041:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/char/ipmi/ipmi_ssif.c:1918:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/char/pcmcia/cm4000_cs.c:922:1: internal compiler error: in arc_ifcvt, 
at config/arc/arc.c:9637
drivers/char/random.c:869:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/char/tpm/tpm_tis_core.c:1122:1: internal compiler error: in arc_ifcvt, 
at config/arc/arc.c:9637
drivers/clk/bcm/clk-iproc-armpll.c:139:1: internal compiler error: in 
arc_ifcvt, at config/arc/arc.c:9637
drivers/clk/clk-bd718x7.c:50:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/clk/clk-lochnagar.c:187:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/crypto/ccree/cc_request_mgr.c:206:1: internal compiler error: in 
arc_ifcvt, at config/arc/arc.c:9637
drivers/crypto/qce/sha.c:73:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/crypto/qce/skcipher.c:61:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/cxl/core/hdm.c:38:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/cxl/core/pci.c:67:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/dma-buf/dma-buf.c:1100:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/firmware/arm_scmi/bus.c:152:1: internal compiler error: in arc_ifcvt, 
at config/arc/arc.c:9637
drivers/firmware/arm_scmi/clock.c:394:1: internal compiler error: in arc_ifcvt, 
at config/arc/arc.c:9637
drivers/firmware/arm_scmi/powercap.c:376:1: internal compiler error: in 
arc_ifcvt, at config/arc/arc.c:9637
drivers/firmware/arm_scmi/sensors.c:673:1: internal compiler error: in 
arc_ifcvt, at config/arc/arc.c:9637
drivers/firmware/arm_scmi/voltage.c:363:1: internal compiler error: in 
arc_ifcvt, at config/arc/arc.c:9637
drivers/fpga/dfl-fme-mgr.c:163:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/gnss/usb.c:68:1: internal compiler error: in arc_ifcvt, at 
config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_debug.c:175:1: internal 
compiler error: in arc_ifcvt, at config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:1006:1: internal 
compiler error: in arc_ifcvt, at config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../display/dc/dce110/dce110_resource.c:1035:1: 
internal compiler error: in arc_ifcvt, at config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../display/dc/dce112/dce112_resource.c:955:1: 
internal compiler error: in arc_ifcvt, at config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../pm/powerplay/hwmgr/smu7_hwmgr.c:3895:1: internal 
compiler error: in arc_ifcvt, at config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../pm/powerplay/hwmgr/smu8_hwmgr.c:754:1: internal 
compiler error: in arc_ifcvt, at config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../pm/powerplay/hwmgr/vega10_powertune.c:1214:1: 
internal compiler error: in arc_ifcvt, at config/arc/arc.c:9637
drivers/gpu/drm/amd/amdgpu/../pm/powerplay/smumgr/smu7_smumgr.c:195:1: internal

Re: [PATCH v1 03/16] dt-bindings: power: mediatek: Refine multiple level power domain nodes

2022-07-05 Thread Rob Herring
On Mon, Jul 04, 2022 at 06:00:15PM +0800, Tinghan Shen wrote:
> Extract duplicated properties and support more levels of power
> domain nodes.
> 
> This change fix following error when do dtbs_check,
> arch/arm64/boot/dts/mediatek/mt8195-evb.dtb: power-controller: 
> power-domain@15:power-domain@16:power-domain@18: 'power-domain@19', 
> 'power-domain@20', 'power-domain@21' do not match any of the regexes: 
> 'pinctrl-[0-9]+'
>From schema: 
> Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
> 
> Signed-off-by: Tinghan Shen 
> ---
>  .../power/mediatek,power-controller.yaml  | 132 ++
>  1 file changed, 12 insertions(+), 120 deletions(-)
> 
> diff --git 
> a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml 
> b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
> index 135c6f722091..09a537a802b8 100644
> --- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
> +++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
> @@ -39,8 +39,17 @@ properties:
>'#size-cells':
>  const: 0
>  
> +required:
> +  - compatible
> +
> +additionalProperties: false
> +
>  patternProperties:
>"^power-domain@[0-9a-f]+$":
> +$ref: "#/$defs/power-domain-node"
> +
> +$defs:
> +  power-domain-node:
>  type: object
>  description: |
>Represents the power domains within the power controller node as 
> documented
> @@ -98,127 +107,10 @@ patternProperties:
>  $ref: /schemas/types.yaml#/definitions/phandle
>  description: phandle to the device containing the SMI register range.
>  
> -patternProperties:
> -  "^power-domain@[0-9a-f]+$":
> -type: object
> -description: |
> -  Represents a power domain child within a power domain parent node.
> -
> -properties:
> -
> -  '#power-domain-cells':
> -description:
> -  Must be 0 for nodes representing a single PM domain and 1 for 
> nodes
> -  providing multiple PM domains.
> -
> -  '#address-cells':
> -const: 1
> -
> -  '#size-cells':
> -const: 0
> -
> -  reg:
> -maxItems: 1
> -
> -  clocks:
> -description: |
> -  A number of phandles to clocks that need to be enabled during 
> domain
> -  power-up sequencing.
> -
> -  clock-names:
> -description: |
> -  List of names of clocks, in order to match the power-up 
> sequencing
> -  for each power domain we need to group the clocks by name. 
> BASIC
> -  clocks need to be enabled before enabling the corresponding 
> power
> -  domain, and should not have a '-' in their name (i.e mm, mfg, 
> venc).
> -  SUSBYS clocks need to be enabled before releasing the bus 
> protection,
> -  and should contain a '-' in their name (i.e mm-0, isp-0, 
> cam-0).
> -
> -  In order to follow properly the power-up sequencing, the 
> clocks must
> -  be specified by order, adding first the BASIC clocks followed 
> by the
> -  SUSBSYS clocks.
> -
> -  domain-supply:
> -description: domain regulator supply.
> -
> -  mediatek,infracfg:
> -$ref: /schemas/types.yaml#/definitions/phandle
> -description: phandle to the device containing the INFRACFG 
> register range.
> -
> -  mediatek,smi:
> -$ref: /schemas/types.yaml#/definitions/phandle
> -description: phandle to the device containing the SMI register 
> range.
> -
> -patternProperties:
> -  "^power-domain@[0-9a-f]+$":
> -type: object
> -description: |
> -  Represents a power domain child within a power domain parent 
> node.
> -
> -properties:
> +  required:
> +- reg
>  
> -  '#power-domain-cells':
> -description:
> -  Must be 0 for nodes representing a single PM domain and 1 
> for nodes
> -  providing multiple PM domains.
> -
> -  '#address-cells':
> -const: 1
> -
> -  '#size-cells':
> -const: 0
> -
> -  reg:
> -maxItems: 1
> -
> -  clocks:
> -description: |
> -  A number of phandles to clocks that need to be enabled 
> during domain
> -  power-up sequencing.
> -
> -  clock-names:
> -description: |
> -  List of names of clocks, in order to match the power-up 
> sequencing
> -  for each power domain we need to group the clocks by name. 
> BASIC
> -  clocks need to be enabled before enabling the 
> corresponding power
> -  domain, and should not have a '-' in their name (i.e mm, 
> mfg, 

Re: [PATCH v1 01/16] dt-bindings: iommu: mediatek: Increase max interrupt number

2022-07-05 Thread Rob Herring
On Mon, Jul 04, 2022 at 06:00:13PM +0800, Tinghan Shen wrote:
> mt8195 infra iommu has max 5 interrupts.
> 
> Signed-off-by: Tinghan Shen 
> ---
>  .../devicetree/bindings/iommu/mediatek,iommu.yaml| 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml 
> b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
> index 2ae3bbad7f1a..27eb9f6aa3ce 100644
> --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
> +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
> @@ -91,7 +91,8 @@ properties:
>  maxItems: 1
>  
>interrupts:
> -maxItems: 1
> +minItems: 1
> +maxItems: 5
>  
>clocks:
>  items:
> @@ -175,9 +176,18 @@ allOf:
>const: mediatek,mt8195-iommu-infra
>  
>  then:
> +  properties:
> +interrupts:
> +  maxItems: 1
> +
>required:
>  - mediatek,larbs
>  
> +else:
> +  properties:
> +interrupts:
> +  maxItems: 5

5 is already the max.

minItems: 5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7] dt-bindings: reserved-memory: Document iommu-addresses

2022-07-05 Thread Rob Herring
On Tue, 05 Jul 2022 15:06:52 +0200, Thierry Reding wrote:
> From: Thierry Reding 
> 
> This adds the "iommu-addresses" property to reserved-memory nodes, which
> allow describing the interaction of memory regions with IOMMUs. Two use-
> cases are supported:
> 
>   1. Static mappings can be described by pairing the "iommu-addresses"
>  property with a "reg" property. This is mostly useful for adopting
>  firmware-allocated buffers via identity mappings. One common use-
>  case where this is required is if early firmware or bootloaders
>  have set up a bootsplash framebuffer that a display controller is
>  actively scanning out from during the operating system boot
>  process.
> 
>   2. If an "iommu-addresses" property exists without a "reg" property,
>  the reserved-memory node describes an IOVA reservation. Such memory
>  regions are excluded from the IOVA space available to operating
>  system drivers and can be used for regions that must not be used to
>  map arbitrary buffers.
> 
> Each mapping or reservation is tied to a specific device via a phandle
> to the device's device tree node. This allows a reserved-memory region
> to be reused across multiple devices.
> 
> Signed-off-by: Thierry Reding 
> ---
> Changes in v7:
> - keep reserved-memory.txt to avoid broken references
> 
> Changes in v6:
> - add device phandle to iommu-addresses property in examples
> - remove now unused dt-bindings/reserved-memory.h header
> ---
>  .../reserved-memory/reserved-memory.yaml  | 62 +++
>  1 file changed, 62 insertions(+)
> 

My bot found errors running 'make DT_CHECKER_FLAGS=-m dt_binding_check'
on your patch (DT_CHECKER_FLAGS is new in v5.13):

yamllint warnings/errors:

dtschema/dtc warnings/errors:
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:22.7-45:
 Warning (reg_format): /reserved-memory/framebuffer@9000:reg: property has 
invalid length (16 bytes) (#address-cells == 2, #size-cells == 1)
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:29.7-40:
 Warning (reg_format): /bus@0/adsp@299:reg: property has invalid length (16 
bytes) (#address-cells == 2, #size-cells == 1)
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:34.7-42:
 Warning (reg_format): /bus@0/display@1520:reg: property has invalid length 
(16 bytes) (#address-cells == 2, #size-cells == 1)
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:9.5-12:
 Warning (ranges_format): /reserved-memory:ranges: empty "ranges" property but 
its #size-cells (1) differs from / (2)
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:27.9-37.5:
 Warning (unit_address_vs_reg): /bus@0: node has a unit name, but no reg or 
ranges property
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dtb: 
Warning (pci_device_reg): Failed prerequisite 'reg_format'
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dtb: 
Warning (pci_device_bus_num): Failed prerequisite 'reg_format'
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dtb: 
Warning (simple_bus_reg): Failed prerequisite 'reg_format'
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dtb: 
Warning (i2c_bus_reg): Failed prerequisite 'reg_format'
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dtb: 
Warning (spi_bus_reg): Failed prerequisite 'reg_format'
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:21.30-24.7:
 Warning (avoid_default_addr_size): /reserved-memory/framebuffer@9000: 
Relying on default #address-cells value
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:21.30-24.7:
 Warning (avoid_default_addr_size): /reserved-memory/framebuffer@9000: 
Relying on default #size-cells value
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:28.24-31.7:
 Warning (avoid_default_addr_size): /bus@0/adsp@299: Relying on default 
#address-cells value
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:28.24-31.7:
 Warning (avoid_default_addr_size): /bus@0/adsp@299: Relying on default 
#size-cells value
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:33.27-36.7:
 Warning (avoid_default_addr_size): /bus@0/display@1520: Relying on default 
#address-cells value
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dts:33.27-36.7:
 Warning (avoid_default_addr_size): /bus@0/display@1520: Relying on default 
#size-cells value
Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dtb: 
Warning (unique_unit_address_if_enabled): Failed prerequisite 
'avoid_default_addr_size'
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/reserved-memory/reserved-memory.example.dtb:
 /: bus@0: 'anyOf' 

Re: [PATCH v2] ACPI: VIOT: Fix ACS setup

2022-07-05 Thread Rafael J. Wysocki
On Thu, Jun 30, 2022 at 11:59 AM Jean-Philippe Brucker
 wrote:
>
> On Thu, Jun 30, 2022 at 11:40:59AM +0200, Eric Auger wrote:
> > Currently acpi_viot_init() gets called after the pci
> > device has been scanned and pci_enable_acs() has been called.
> > So pci_request_acs() fails to be taken into account leading
> > to wrong single iommu group topologies when dealing with
> > multi-function root ports for instance.
> >
> > We cannot simply move the acpi_viot_init() earlier, similarly
> > as the IORT init because the VIOT parsing relies on the pci
> > scan. However we can detect VIOT is present earlier and in
> > such a case, request ACS. Introduce a new acpi_viot_early_init()
> > routine that allows to call pci_request_acs() before the scan.
> >
> > While at it, guard the call to pci_request_acs() with #ifdef
> > CONFIG_PCI.
> >
> > Fixes: 3cf485540e7b ("ACPI: Add driver for the VIOT table")
> > Signed-off-by: Eric Auger 
> > Reported-by: Jin Liu 
>
> Reviewed-by: Jean-Philippe Brucker 
> Tested-by: Jean-Philippe Brucker 

Applied as 5.20 material, thanks!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Logan Gunthorpe



On 2022-07-05 11:42, Greg Kroah-Hartman wrote:
> On Tue, Jul 05, 2022 at 11:32:23AM -0600, Logan Gunthorpe wrote:
>>
>>
>> On 2022-07-05 11:21, Greg Kroah-Hartman wrote:
>>> On Tue, Jul 05, 2022 at 06:50:39PM +0200, Christoph Hellwig wrote:
 [note for the newcomers, this is about allowing mmap()ing the PCIe
 P2P memory from the generic PCI P2P code through sysfs, and more
 importantly how to revoke it on device removal]
>>>
>>> We allow mmap on PCIe config space today, right?  Why is this different
>>> from what pci_create_legacy_files() does today?
>>>
 On Tue, Jul 05, 2022 at 10:44:49AM -0600, Logan Gunthorpe wrote:
> We might be able to. I'm not sure. I'll have to figure out how to find
> that inode from the p2pdma code. I haven't found an obvious interface to
> do that.

 I think the right way to approach this would be a new sysfs API
 that internally calls unmap_mapping_range internally instead of
 exposing the inode. I suspect that might actually be the right thing
 to do for iomem_inode as well.
>>>
>>> Why do we need something new and how is this any different from the PCI
>>> binary files I mention above?  We have supported PCI hotplug for a very
>>> long time, do the current PCI binary sysfs files not work properly with
>>> mmap and removing a device?
>>
>> The P2PDMA code allocates and hands out struct pages to userspace that
>> are backed with ZONE_DEVICE memory from a device's BAR. This is quite
>> different from the existing binary files mentioned above which neither
>> support struct pages nor allocation.
> 
> Why would you want to do this through a sysfs interface?  that feels
> horrid...

The current version does it through a char device, but that requires
creating a simple_fs and anon_inode for teardown on driver removal, plus
a bunch of hooks through the driver that exposes it (NVMe, in this case)
to set this all up.

Christoph is suggesting a sysfs interface which could potentially avoid
the anon_inode and all of the extra hooks. It has some significant
benefits and maybe some small downsides, but I wouldn't describe it as
horrid.

Logan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Greg Kroah-Hartman
On Tue, Jul 05, 2022 at 11:32:23AM -0600, Logan Gunthorpe wrote:
> 
> 
> On 2022-07-05 11:21, Greg Kroah-Hartman wrote:
> > On Tue, Jul 05, 2022 at 06:50:39PM +0200, Christoph Hellwig wrote:
> >> [note for the newcomers, this is about allowing mmap()ing the PCIe
> >> P2P memory from the generic PCI P2P code through sysfs, and more
> >> importantly how to revoke it on device removal]
> > 
> > We allow mmap on PCIe config space today, right?  Why is this different
> > from what pci_create_legacy_files() does today?
> > 
> >> On Tue, Jul 05, 2022 at 10:44:49AM -0600, Logan Gunthorpe wrote:
> >>> We might be able to. I'm not sure. I'll have to figure out how to find
> >>> that inode from the p2pdma code. I haven't found an obvious interface to
> >>> do that.
> >>
> >> I think the right way to approach this would be a new sysfs API
> >> that internally calls unmap_mapping_range internally instead of
> >> exposing the inode. I suspect that might actually be the right thing
> >> to do for iomem_inode as well.
> > 
> > Why do we need something new and how is this any different from the PCI
> > binary files I mention above?  We have supported PCI hotplug for a very
> > long time, do the current PCI binary sysfs files not work properly with
> > mmap and removing a device?
> 
> The P2PDMA code allocates and hands out struct pages to userspace that
> are backed with ZONE_DEVICE memory from a device's BAR. This is quite
> different from the existing binary files mentioned above which neither
> support struct pages nor allocation.

Why would you want to do this through a sysfs interface?  that feels
horrid...

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Logan Gunthorpe



On 2022-07-05 11:21, Greg Kroah-Hartman wrote:
> On Tue, Jul 05, 2022 at 06:50:39PM +0200, Christoph Hellwig wrote:
>> [note for the newcomers, this is about allowing mmap()ing the PCIe
>> P2P memory from the generic PCI P2P code through sysfs, and more
>> importantly how to revoke it on device removal]
> 
> We allow mmap on PCIe config space today, right?  Why is this different
> from what pci_create_legacy_files() does today?
> 
>> On Tue, Jul 05, 2022 at 10:44:49AM -0600, Logan Gunthorpe wrote:
>>> We might be able to. I'm not sure. I'll have to figure out how to find
>>> that inode from the p2pdma code. I haven't found an obvious interface to
>>> do that.
>>
>> I think the right way to approach this would be a new sysfs API
>> that internally calls unmap_mapping_range internally instead of
>> exposing the inode. I suspect that might actually be the right thing
>> to do for iomem_inode as well.
> 
> Why do we need something new and how is this any different from the PCI
> binary files I mention above?  We have supported PCI hotplug for a very
> long time, do the current PCI binary sysfs files not work properly with
> mmap and removing a device?

The P2PDMA code allocates and hands out struct pages to userspace that
are backed with ZONE_DEVICE memory from a device's BAR. This is quite
different from the existing binary files mentioned above which neither
support struct pages nor allocation.

Logan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Greg Kroah-Hartman
On Tue, Jul 05, 2022 at 06:50:39PM +0200, Christoph Hellwig wrote:
> [note for the newcomers, this is about allowing mmap()ing the PCIe
> P2P memory from the generic PCI P2P code through sysfs, and more
> importantly how to revoke it on device removal]

We allow mmap on PCIe config space today, right?  Why is this different
from what pci_create_legacy_files() does today?

> On Tue, Jul 05, 2022 at 10:44:49AM -0600, Logan Gunthorpe wrote:
> > We might be able to. I'm not sure. I'll have to figure out how to find
> > that inode from the p2pdma code. I haven't found an obvious interface to
> > do that.
> 
> I think the right way to approach this would be a new sysfs API
> that internally calls unmap_mapping_range internally instead of
> exposing the inode. I suspect that might actually be the right thing
> to do for iomem_inode as well.

Why do we need something new and how is this any different from the PCI
binary files I mention above?  We have supported PCI hotplug for a very
long time, do the current PCI binary sysfs files not work properly with
mmap and removing a device?

thanks,

greg k-h
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Christoph Hellwig
[note for the newcomers, this is about allowing mmap()ing the PCIe
P2P memory from the generic PCI P2P code through sysfs, and more
importantly how to revoke it on device removal]

On Tue, Jul 05, 2022 at 10:44:49AM -0600, Logan Gunthorpe wrote:
> We might be able to. I'm not sure. I'll have to figure out how to find
> that inode from the p2pdma code. I haven't found an obvious interface to
> do that.

I think the right way to approach this would be a new sysfs API
that internally calls unmap_mapping_range internally instead of
exposing the inode. I suspect that might actually be the right thing
to do for iomem_inode as well.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Logan Gunthorpe



On 2022-07-05 10:43, Christoph Hellwig wrote:
> On Tue, Jul 05, 2022 at 10:41:52AM -0600, Logan Gunthorpe wrote:
>> Using sysfs means we don't need all the messy callbacks from the nvme
>> driver, which is a plus. But I'm not sure how we'd get or unmap the
>> mapping of a sysfs file or avoid the anonymous inode. Seems with the
>> existing PCI resources, it uses an bin_attribute->f_mapping() callback
>> to pass back the iomem_get_mapping() mapping on file open.
>> revoke_iomem() is then used to nuke the VMAs. I don't think we can use
>> the same infrastructure here as that would add a dependency on
>> CONFIG_IO_STRICT_DEVMEM; which would be odd. And I'm not sure whether
>> there is a better way.
> 
> Why can't we do the revoke on the actual sysfs inode?

We might be able to. I'm not sure. I'll have to figure out how to find
that inode from the p2pdma code. I haven't found an obvious interface to
do that.

Logan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Christoph Hellwig
On Tue, Jul 05, 2022 at 10:41:52AM -0600, Logan Gunthorpe wrote:
> Using sysfs means we don't need all the messy callbacks from the nvme
> driver, which is a plus. But I'm not sure how we'd get or unmap the
> mapping of a sysfs file or avoid the anonymous inode. Seems with the
> existing PCI resources, it uses an bin_attribute->f_mapping() callback
> to pass back the iomem_get_mapping() mapping on file open.
> revoke_iomem() is then used to nuke the VMAs. I don't think we can use
> the same infrastructure here as that would add a dependency on
> CONFIG_IO_STRICT_DEVMEM; which would be odd. And I'm not sure whether
> there is a better way.

Why can't we do the revoke on the actual sysfs inode?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Logan Gunthorpe



On 2022-07-05 10:12, Christoph Hellwig wrote:
> On Tue, Jul 05, 2022 at 10:51:02AM -0300, Jason Gunthorpe wrote:
>>> In fact I'm not even sure this should be a character device, it seems
>>> to fit it way better with the PCI sysfs hierchacy, just like how we
>>> map MMIO resources, which these are anyway.  And once it is on sysfs
>>> we do have a uniqueue inode and need none of the pseudofs stuff, and
>>> don't need all the glue code in nvme either.
>>
>> Shouldn't there be an allocator here? It feels a bit weird that the
>> entire CMB is given to a single process, it is a sharable resource,
>> isn't it?
> 
> Making the entire area given by the device to the p2p allocator available
> to user space seems sensible to me.  That is what the current series does,
> and what a sysfs interface would do as well.

Yes, I think Jason is assuming the sysfs file would behave like the
existing mmio resource files where the process doing the mapping
specifies the offset and length into the BAR. That is not what we want
here, but I don't see why I don't see why we can't do the same thing in
sysfs as we do with the char device with a bin_attribute->mmap() callback.

mmapping the char device was convenient in user space, but it's not much
more work to dig through sysfs and mmap an attribute from there.

Using sysfs means we don't need all the messy callbacks from the nvme
driver, which is a plus. But I'm not sure how we'd get or unmap the
mapping of a sysfs file or avoid the anonymous inode. Seems with the
existing PCI resources, it uses an bin_attribute->f_mapping() callback
to pass back the iomem_get_mapping() mapping on file open.
revoke_iomem() is then used to nuke the VMAs. I don't think we can use
the same infrastructure here as that would add a dependency on
CONFIG_IO_STRICT_DEVMEM; which would be odd. And I'm not sure whether
there is a better way.

Logan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Christoph Hellwig
On Tue, Jul 05, 2022 at 01:29:59PM -0300, Jason Gunthorpe wrote:
> > Making the entire area given by the device to the p2p allocator available
> > to user space seems sensible to me.  That is what the current series does,
> > and what a sysfs interface would do as well.
> 
> That makes openning the mmap exclusive with the in-kernel allocator -
> so it means opening the mmap fails if something else is using a P2P
> page and once the mmap is open all kernel side P2P allocations will
> fail?

No.  Just as in the current patchset you can mmap the file and will get
len / PAGE_SIZE pages from the per-device p2pdma pool, or the mmap will
fail if none are available.  A kernel consumer (or multiple) can use
other pages in the pool at the same time.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Jason Gunthorpe
On Tue, Jul 05, 2022 at 06:12:40PM +0200, Christoph Hellwig wrote:
> On Tue, Jul 05, 2022 at 10:51:02AM -0300, Jason Gunthorpe wrote:
> > > In fact I'm not even sure this should be a character device, it seems
> > > to fit it way better with the PCI sysfs hierchacy, just like how we
> > > map MMIO resources, which these are anyway.  And once it is on sysfs
> > > we do have a uniqueue inode and need none of the pseudofs stuff, and
> > > don't need all the glue code in nvme either.
> > 
> > Shouldn't there be an allocator here? It feels a bit weird that the
> > entire CMB is given to a single process, it is a sharable resource,
> > isn't it?
> 
> Making the entire area given by the device to the p2p allocator available
> to user space seems sensible to me.  That is what the current series does,
> and what a sysfs interface would do as well.

That makes openning the mmap exclusive with the in-kernel allocator -
so it means opening the mmap fails if something else is using a P2P
page and once the mmap is open all kernel side P2P allocations will
fail?

Which seems inelegant, I would expect the the mmap operation to
request some pages from the P2P allocator and provide them to
userspace so user and kernel workflows can co-exist using the same
CMB.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Christoph Hellwig
On Tue, Jul 05, 2022 at 10:51:02AM -0300, Jason Gunthorpe wrote:
> > In fact I'm not even sure this should be a character device, it seems
> > to fit it way better with the PCI sysfs hierchacy, just like how we
> > map MMIO resources, which these are anyway.  And once it is on sysfs
> > we do have a uniqueue inode and need none of the pseudofs stuff, and
> > don't need all the glue code in nvme either.
> 
> Shouldn't there be an allocator here? It feels a bit weird that the
> entire CMB is given to a single process, it is a sharable resource,
> isn't it?

Making the entire area given by the device to the p2p allocator available
to user space seems sensible to me.  That is what the current series does,
and what a sysfs interface would do as well.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/8] irqchip/mips-gic: Only register IPI domain when SMP is enabled

2022-07-05 Thread Serge Semin
Hi Samuel

On Fri, Jul 01, 2022 at 03:00:49PM -0500, Samuel Holland wrote:
> The MIPS GIC irqchip driver may be selected in a uniprocessor
> configuration, but it unconditionally registers an IPI domain.
> 
> Limit the part of the driver dealing with IPIs to only be compiled when
> GENERIC_IRQ_IPI is enabled, which corresponds to an SMP configuration.

Thanks for the patch. Some comment is below.

> 
> Reported-by: kernel test robot 
> Signed-off-by: Samuel Holland 
> ---
> 
> Changes in v3:
>  - New patch to fix build errors in uniprocessor MIPS configs
> 
>  drivers/irqchip/Kconfig|  3 +-
>  drivers/irqchip/irq-mips-gic.c | 80 +++---
>  2 files changed, 56 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index 1f23a6be7d88..d26a4ff7c99f 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -322,7 +322,8 @@ config KEYSTONE_IRQ
>  
>  config MIPS_GIC
>   bool
> - select GENERIC_IRQ_IPI
> + select GENERIC_IRQ_IPI if SMP

> + select IRQ_DOMAIN_HIERARCHY

It seems to me that the IRQ domains hierarchy is supposed to be
created only if IPI is required. At least that's what the MIPS GIC
driver implies. Thus we can go further and CONFIG_IRQ_DOMAIN_HIERARCHY
ifdef-out the gic_irq_domain_alloc() and gic_irq_domain_free()
methods definition together with the initialization:

 static const struct irq_domain_ops gic_irq_domain_ops = {
.xlate = gic_irq_domain_xlate,
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
.alloc = gic_irq_domain_alloc,
.free = gic_irq_domain_free,
+#endif
.map = gic_irq_domain_map,
};

If the GENERIC_IRQ_IPI config is enabled, CONFIG_IRQ_DOMAIN_HIERARCHY
will be automatically selected (see the config definition in
kernel/irq/Kconfig). If the IRQs hierarchy is needed for some another
functionality like GENERIC_MSI_IRQ_DOMAIN or GPIOs then they will
explicitly enable the IRQ_DOMAIN_HIERARCHY config thus activating the
denoted .alloc and .free methods definitions.

To sum up you can get rid of the IRQ_DOMAIN_HIERARCHY config
force-select from this patch and make the MIPS GIC driver code a bit
more coherent.

@Marc, please correct me if were wrong.

-Serget

>   select MIPS_CM
>  
>  config INGENIC_IRQ
> diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
> index ff89b36267dd..8a9efb6ae587 100644
> --- a/drivers/irqchip/irq-mips-gic.c
> +++ b/drivers/irqchip/irq-mips-gic.c
> @@ -52,13 +52,15 @@ static DEFINE_PER_CPU_READ_MOSTLY(unsigned 
> long[GIC_MAX_LONGS], pcpu_masks);
>  
>  static DEFINE_SPINLOCK(gic_lock);
>  static struct irq_domain *gic_irq_domain;
> -static struct irq_domain *gic_ipi_domain;
>  static int gic_shared_intrs;
>  static unsigned int gic_cpu_pin;
>  static unsigned int timer_cpu_pin;
>  static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller;
> +
> +#ifdef CONFIG_GENERIC_IRQ_IPI
>  static DECLARE_BITMAP(ipi_resrv, GIC_MAX_INTRS);
>  static DECLARE_BITMAP(ipi_available, GIC_MAX_INTRS);
> +#endif /* CONFIG_GENERIC_IRQ_IPI */
>  
>  static struct gic_all_vpes_chip_data {
>   u32 map;
> @@ -472,9 +474,11 @@ static int gic_irq_domain_map(struct irq_domain *d, 
> unsigned int virq,
>   u32 map;
>  
>   if (hwirq >= GIC_SHARED_HWIRQ_BASE) {
> +#ifdef CONFIG_GENERIC_IRQ_IPI
>   /* verify that shared irqs don't conflict with an IPI irq */
>   if (test_bit(GIC_HWIRQ_TO_SHARED(hwirq), ipi_resrv))
>   return -EBUSY;
> +#endif /* CONFIG_GENERIC_IRQ_IPI */
>  
>   err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
>   _level_irq_controller,
> @@ -567,6 +571,8 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
>   .map = gic_irq_domain_map,
>  };
>  
> +#ifdef CONFIG_GENERIC_IRQ_IPI
> +
>  static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node 
> *ctrlr,
>   const u32 *intspec, unsigned int intsize,
>   irq_hw_number_t *out_hwirq,
> @@ -670,6 +676,48 @@ static const struct irq_domain_ops gic_ipi_domain_ops = {
>   .match = gic_ipi_domain_match,
>  };
>  
> +static int gic_register_ipi_domain(struct device_node *node)
> +{
> + struct irq_domain *gic_ipi_domain;
> + unsigned int v[2], num_ipis;
> +
> + gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
> +   IRQ_DOMAIN_FLAG_IPI_PER_CPU,
> +   GIC_NUM_LOCAL_INTRS + 
> gic_shared_intrs,
> +   node, _ipi_domain_ops, 
> NULL);
> + if (!gic_ipi_domain) {
> + pr_err("Failed to add IPI domain");
> + return -ENXIO;
> + }
> +
> + irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI);
> +
> + if (node &&
> + !of_property_read_u32_array(node, 

Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Jason Gunthorpe
On Tue, Jul 05, 2022 at 09:51:08AM +0200, Christoph Hellwig wrote:

> But what also really matters here:  I don't want every user that
> wants to be able to mmap a character device to do all this work.
> The layering is simply wrong, it needs some character device
> based helpers, not be open code everywhere.

I think alot (all?) cases would be happy if the inode was 1:1 with the
cdev struct device. I suppose the cdev code would still have to create
pseudo fs, but at least that is hidden.

> In fact I'm not even sure this should be a character device, it seems
> to fit it way better with the PCI sysfs hierchacy, just like how we
> map MMIO resources, which these are anyway.  And once it is on sysfs
> we do have a uniqueue inode and need none of the pseudofs stuff, and
> don't need all the glue code in nvme either.

Shouldn't there be an allocator here? It feels a bit weird that the
entire CMB is given to a single process, it is a sharable resource,
isn't it?

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7] dt-bindings: reserved-memory: Document iommu-addresses

2022-07-05 Thread Thierry Reding
From: Thierry Reding 

This adds the "iommu-addresses" property to reserved-memory nodes, which
allow describing the interaction of memory regions with IOMMUs. Two use-
cases are supported:

  1. Static mappings can be described by pairing the "iommu-addresses"
 property with a "reg" property. This is mostly useful for adopting
 firmware-allocated buffers via identity mappings. One common use-
 case where this is required is if early firmware or bootloaders
 have set up a bootsplash framebuffer that a display controller is
 actively scanning out from during the operating system boot
 process.

  2. If an "iommu-addresses" property exists without a "reg" property,
 the reserved-memory node describes an IOVA reservation. Such memory
 regions are excluded from the IOVA space available to operating
 system drivers and can be used for regions that must not be used to
 map arbitrary buffers.

Each mapping or reservation is tied to a specific device via a phandle
to the device's device tree node. This allows a reserved-memory region
to be reused across multiple devices.

Signed-off-by: Thierry Reding 
---
Changes in v7:
- keep reserved-memory.txt to avoid broken references

Changes in v6:
- add device phandle to iommu-addresses property in examples
- remove now unused dt-bindings/reserved-memory.h header
---
 .../reserved-memory/reserved-memory.yaml  | 62 +++
 1 file changed, 62 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml 
b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml
index 7a0744052ff6..8b885ee82ff4 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml
@@ -52,6 +52,30 @@ properties:
   Address and Length pairs. Specifies regions of memory that are
   acceptable to allocate from.
 
+  iommu-addresses:
+$ref: /schemas/types.yaml#/definitions/phandle-array
+description: >
+  A list of phandle and specifier pairs that describe static IO virtual
+  address space mappings and carveouts associated with a given reserved
+  memory region. The phandle in the first cell refers to the device for
+  which the mapping or carveout is to be created.
+
+  The specifier consists of an address/size pair and denotes the IO
+  virtual address range of the region for the given device. The exact
+  format depends on the values of the "#address-cells" and "#size-cells"
+  properties of the device referenced via the phandle.
+
+  When used in combination with a "reg" property, an IOVA mapping is to
+  be established for this memory region. One example where this can be
+  useful is to create an identity mapping for physical memory that the
+  firmware has configured some hardware to access (such as a bootsplash
+  framebuffer).
+
+  If no "reg" property is specified, the "iommu-addresses" property
+  defines carveout regions in the IOVA space for the given device. This
+  can be useful if a certain memory region should not be mapped through
+  the IOMMU.
+
   no-map:
 type: boolean
 description: >
@@ -97,4 +121,42 @@ oneOf:
 
 additionalProperties: true
 
+examples:
+  - |
+/ {
+  #address-cells = <2>;
+  #size-cells = <2>;
+
+  reserved-memory {
+ranges;
+
+adsp_resv: reservation-adsp {
+  /*
+   * Restrict IOVA mappings for ADSP buffers to the 512 MiB region
+   * from 0x4000 - 0x5fff. Anything outside is reserved by
+   * the ADSP for I/O memory and private memory allocations.
+   */
+  iommu-addresses = < 0x0 0x 0x00 0x4000>,
+< 0x0 0x6000 0xff 0xa000>;
+};
+
+fb: framebuffer@9000 {
+  reg = <0x0 0x9000 0x0 0x0080>;
+  iommu-addresses = < 0x0 0x9000 0x0 0x0080>;
+};
+  };
+
+  bus@0 {
+adsp: adsp@299 {
+  reg = <0x0 0x299 0x0 0x2000>;
+  memory-region = <_resv>;
+};
+
+dc0: display@1520 {
+  reg = <0x0 0x1520 0x0 0x1>;
+  memory-region = <>;
+};
+  };
+};
+
 ...
-- 
2.36.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 5/5] iommu/tegra-smmu: Support managed domains

2022-07-05 Thread Thierry Reding
From: Navneet Kumar 

Allow creating identity and DMA API compatible IOMMU domains. When
creating a DMA API compatible domain, make sure to also create the
required cookie.

Signed-off-by: Navneet Kumar 
Signed-off-by: Thierry Reding 
---
Changes in v5:
- remove DMA cookie initialization that's now no longer needed

 drivers/iommu/tegra-smmu.c | 37 -
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 93879c40056c..f8b2b863c111 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -277,7 +278,9 @@ static struct iommu_domain 
*tegra_smmu_domain_alloc(unsigned type)
 {
struct tegra_smmu_as *as;
 
-   if (type != IOMMU_DOMAIN_UNMANAGED)
+   if (type != IOMMU_DOMAIN_UNMANAGED &&
+   type != IOMMU_DOMAIN_DMA &&
+   type != IOMMU_DOMAIN_IDENTITY)
return NULL;
 
as = kzalloc(sizeof(*as), GFP_KERNEL);
@@ -287,25 +290,16 @@ static struct iommu_domain 
*tegra_smmu_domain_alloc(unsigned type)
as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE;
 
as->pd = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO);
-   if (!as->pd) {
-   kfree(as);
-   return NULL;
-   }
+   if (!as->pd)
+   goto free_as;
 
as->count = kcalloc(SMMU_NUM_PDE, sizeof(u32), GFP_KERNEL);
-   if (!as->count) {
-   __free_page(as->pd);
-   kfree(as);
-   return NULL;
-   }
+   if (!as->count)
+   goto free_pd_range;
 
as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL);
-   if (!as->pts) {
-   kfree(as->count);
-   __free_page(as->pd);
-   kfree(as);
-   return NULL;
-   }
+   if (!as->pts)
+   goto free_pts;
 
spin_lock_init(>lock);
 
@@ -315,6 +309,15 @@ static struct iommu_domain 
*tegra_smmu_domain_alloc(unsigned type)
as->domain.geometry.force_aperture = true;
 
return >domain;
+
+free_pts:
+   kfree(as->pts);
+free_pd_range:
+   __free_page(as->pd);
+free_as:
+   kfree(as);
+
+   return NULL;
 }
 
 static void tegra_smmu_domain_free(struct iommu_domain *domain)
@@ -1009,7 +1012,7 @@ static const struct iommu_ops tegra_smmu_ops = {
.probe_device = tegra_smmu_probe_device,
.release_device = tegra_smmu_release_device,
.device_group = tegra_smmu_device_group,
-   .get_resv_regions = of_iommu_get_resv_regions,
+   .get_resv_regions = iommu_dma_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
.of_xlate = tegra_smmu_of_xlate,
.pgsize_bitmap = SZ_4K,
-- 
2.36.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 4/5] iommu/tegra-smmu: Add support for reserved regions

2022-07-05 Thread Thierry Reding
From: Thierry Reding 

The Tegra DRM driver currently uses the IOMMU API explicitly. This means
that it has fine-grained control over when exactly the translation
through the IOMMU is enabled. This currently happens after the driver
probes, so the driver is in a DMA quiesced state when the IOMMU
translation is enabled.

During the transition of the Tegra DRM driver to use the DMA API instead
of the IOMMU API explicitly, it was observed that on certain platforms
the display controllers were still actively fetching from memory. When a
DMA IOMMU domain is created as part of the DMA/IOMMU API setup during
boot, the IOMMU translation for the display controllers can be enabled a
significant amount of time before the driver has had a chance to reset
the hardware into a sane state. This causes the SMMU to detect faults on
the addresses that the display controller is trying to fetch.

To avoid this, and as a byproduct paving the way for seamless transition
of display from the bootloader to the kernel, add support for reserved
regions in the Tegra SMMU driver. This is implemented using the standard
reserved memory device tree bindings, which let us describe regions of
memory which the kernel is forbidden from using for regular allocations.
The Tegra SMMU driver will parse the nodes associated with each device
via the "memory-region" property and return reserved regions that the
IOMMU core will then create direct mappings for prior to attaching the
IOMMU domains to the devices. This ensures that a 1:1 mapping is in
place when IOMMU translation starts and prevents the SMMU from detecting
any faults.

Signed-off-by: Thierry Reding 
---
 drivers/iommu/tegra-smmu.c | 47 --
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 2f2b12033618..93879c40056c 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -471,6 +472,7 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
tegra_smmu_free_asid(smmu, as->id);
 
dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
+   as->pd_dma = 0;
 
as->smmu = NULL;
 
@@ -534,6 +536,38 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, 
unsigned long iova,
struct tegra_smmu *smmu = as->smmu;
u32 *pd = page_address(as->pd);
unsigned long offset = pd_index * sizeof(*pd);
+   bool unmap = false;
+
+   /*
+* XXX Move this outside of this function. Perhaps add a struct
+* iommu_domain parameter to ->{get,put}_resv_regions() so that
+* the mapping can be done there.
+*
+* The problem here is that as->smmu is only known once we attach
+* the domain to a device (because then we look up the right SMMU
+* instance via the dev->archdata.iommu pointer). When the direct
+* mappings are created for reserved regions, the domain has not
+* been attached to a device yet, so we don't know. We currently
+* fix that up in ->apply_resv_regions() because that is the first
+* time where we have access to a struct device that will be used
+* with the IOMMU domain. However, that's asymmetric and doesn't
+* take care of the page directory mapping either, so we need to
+* come up with something better.
+*/
+   if (WARN_ON_ONCE(as->pd_dma == 0)) {
+   as->pd_dma = dma_map_page(smmu->dev, as->pd, 0, SMMU_SIZE_PD,
+ DMA_TO_DEVICE);
+   if (dma_mapping_error(smmu->dev, as->pd_dma))
+   return;
+
+   if (!smmu_dma_addr_valid(smmu, as->pd_dma)) {
+   dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD,
+  DMA_TO_DEVICE);
+   return;
+   }
+
+   unmap = true;
+   }
 
/* Set the page directory entry first */
pd[pd_index] = value;
@@ -546,6 +580,12 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, 
unsigned long iova,
smmu_flush_ptc(smmu, as->pd_dma, offset);
smmu_flush_tlb_section(smmu, as->id, iova);
smmu_flush(smmu);
+
+   if (unmap) {
+   dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD,
+  DMA_TO_DEVICE);
+   as->pd_dma = 0;
+   }
 }
 
 static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova)
@@ -846,7 +886,6 @@ static struct iommu_device *tegra_smmu_probe_device(struct 
device *dev)
smmu = tegra_smmu_find(args.np);
if (smmu) {
err = tegra_smmu_configure(smmu, dev, );
-
if (err < 0) {
of_node_put(args.np);
return 

[PATCH v6 2/5] iommu: Implement of_iommu_get_resv_regions()

2022-07-05 Thread Thierry Reding
From: Thierry Reding 

This is an implementation that IOMMU drivers can use to obtain reserved
memory regions from a device tree node. It uses the reserved-memory DT
bindings to find the regions associated with a given device. If these
regions are marked accordingly, identity mappings will be created for
them in the IOMMU domain that the devices will be attached to.

Cc: Frank Rowand 
Cc: devicet...@vger.kernel.org
Reviewed-by: Rob Herring 
Signed-off-by: Thierry Reding 
---
Changes in v6:
- remove reference to now unused dt-bindings/reserved-memory.h include

Changes in v5:
- update for new "iommu-addresses" device tree bindings

Changes in v4:
- fix build failure on !CONFIG_OF_ADDRESS

Changes in v3:
- change "active" property to identity mapping flag that is part of the
  memory region specifier (as defined by #memory-region-cells) to allow
  per-reference flags to be used

Changes in v2:
- use "active" property to determine whether direct mappings are needed

 drivers/iommu/of_iommu.c | 88 
 include/linux/of_iommu.h |  8 
 2 files changed, 96 insertions(+)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 41f4eb005219..c62da41516eb 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -172,3 +173,90 @@ const struct iommu_ops *of_iommu_configure(struct device 
*dev,
 
return ops;
 }
+
+/**
+ * of_iommu_get_resv_regions - reserved region driver helper for device tree
+ * @dev: device for which to get reserved regions
+ * @list: reserved region list
+ *
+ * IOMMU drivers can use this to implement their .get_resv_regions() callback
+ * for memory regions attached to a device tree node. See the reserved-memory
+ * device tree bindings on how to use these:
+ *
+ *   Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+ */
+void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
+{
+#if IS_ENABLED(CONFIG_OF_ADDRESS)
+   struct of_phandle_iterator it;
+   int err;
+
+   of_for_each_phandle(, err, dev->of_node, "memory-region", NULL, 0) {
+   struct iommu_resv_region *region;
+   struct resource res;
+   const __be32 *maps;
+   int size;
+
+   memset(, 0, sizeof(res));
+
+   /*
+* The "reg" property is optional and can be omitted by 
reserved-memory regions
+* that represent reservations in the IOVA space, which are 
regions that should
+* not be mapped.
+*/
+   if (of_find_property(it.node, "reg", NULL)) {
+   err = of_address_to_resource(it.node, 0, );
+   if (err < 0) {
+   dev_err(dev, "failed to parse memory region 
%pOF: %d\n",
+   it.node, err);
+   continue;
+   }
+   }
+
+   maps = of_get_property(it.node, "iommu-addresses", );
+   if (maps) {
+   const __be32 *end = maps + size / sizeof(__be32);
+   struct device_node *np;
+   unsigned int index = 0;
+   u32 phandle;
+   int na, ns;
+
+   while (maps < end) {
+   phys_addr_t start, end;
+   size_t length;
+
+   phandle = be32_to_cpup(maps++);
+   np = of_find_node_by_phandle(phandle);
+   na = of_n_addr_cells(np);
+   ns = of_n_size_cells(np);
+
+   start = of_translate_dma_address(np, maps);
+   length = of_read_number(maps + na, ns);
+   end = start + length - 1;
+
+   if (np == dev->of_node) {
+   int prot = IOMMU_READ | IOMMU_WRITE;
+   enum iommu_resv_type type;
+
+   /*
+* IOMMU regions without an associated 
physical region
+* cannot be mapped and are simply 
reservations.
+*/
+   if (res.end > res.start)
+   type = 
IOMMU_RESV_DIRECT_RELAXABLE;
+   else
+   type = IOMMU_RESV_RESERVED;
+
+   region = iommu_alloc_resv_region(start, 
length, prot, type);
+   if (region)
+   list_add_tail(>list, 

[PATCH v6 3/5] iommu: dma: Use of_iommu_get_resv_regions()

2022-07-05 Thread Thierry Reding
From: Thierry Reding 

For device tree nodes, use the standard of_iommu_get_resv_regions()
implementation to obtain the reserved memory regions associated with a
device.

Cc: Rob Herring 
Cc: Frank Rowand 
Cc: devicet...@vger.kernel.org
Signed-off-by: Thierry Reding 
---
 drivers/iommu/dma-iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1910f4f1612b..84fad59bc789 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -389,6 +390,8 @@ void iommu_dma_get_resv_regions(struct device *dev, struct 
list_head *list)
if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
iort_iommu_msi_get_resv_regions(dev, list);
 
+   if (dev->of_node)
+   of_iommu_get_resv_regions(dev, list);
 }
 EXPORT_SYMBOL(iommu_dma_get_resv_regions);
 
-- 
2.36.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 1/5] dt-bindings: reserved-memory: Document iommu-addresses

2022-07-05 Thread Thierry Reding
From: Thierry Reding 

This adds the "iommu-addresses" property to reserved-memory nodes, which
allow describing the interaction of memory regions with IOMMUs. Two use-
cases are supported:

  1. Static mappings can be described by pairing the "iommu-addresses"
 property with a "reg" property. This is mostly useful for adopting
 firmware-allocated buffers via identity mappings. One common use-
 case where this is required is if early firmware or bootloaders
 have set up a bootsplash framebuffer that a display controller is
 actively scanning out from during the operating system boot
 process.

  2. If an "iommu-addresses" property exists without a "reg" property,
 the reserved-memory node describes an IOVA reservation. Such memory
 regions are excluded from the IOVA space available to operating
 system drivers and can be used for regions that must not be used to
 map arbitrary buffers.

Each mapping or reservation is tied to a specific device via a phandle
to the device's device tree node. This allows a reserved-memory region
to be reused across multiple devices.

Signed-off-by: Thierry Reding 
---
Changes in v6:
- add device phandle to iommu-addresses property in examples
- remove now unused dt-bindings/reserved-memory.h header

 .../reserved-memory/reserved-memory.txt   |  1 -
 .../reserved-memory/reserved-memory.yaml  | 62 +++
 2 files changed, 62 insertions(+), 1 deletion(-)
 delete mode 100644 
Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt 
b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
deleted file mode 100644
index 1810701a8509..
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ /dev/null
@@ -1 +0,0 @@
-This file has been moved to reserved-memory.yaml.
diff --git 
a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml 
b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml
index 7a0744052ff6..8b885ee82ff4 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml
@@ -52,6 +52,30 @@ properties:
   Address and Length pairs. Specifies regions of memory that are
   acceptable to allocate from.
 
+  iommu-addresses:
+$ref: /schemas/types.yaml#/definitions/phandle-array
+description: >
+  A list of phandle and specifier pairs that describe static IO virtual
+  address space mappings and carveouts associated with a given reserved
+  memory region. The phandle in the first cell refers to the device for
+  which the mapping or carveout is to be created.
+
+  The specifier consists of an address/size pair and denotes the IO
+  virtual address range of the region for the given device. The exact
+  format depends on the values of the "#address-cells" and "#size-cells"
+  properties of the device referenced via the phandle.
+
+  When used in combination with a "reg" property, an IOVA mapping is to
+  be established for this memory region. One example where this can be
+  useful is to create an identity mapping for physical memory that the
+  firmware has configured some hardware to access (such as a bootsplash
+  framebuffer).
+
+  If no "reg" property is specified, the "iommu-addresses" property
+  defines carveout regions in the IOVA space for the given device. This
+  can be useful if a certain memory region should not be mapped through
+  the IOMMU.
+
   no-map:
 type: boolean
 description: >
@@ -97,4 +121,42 @@ oneOf:
 
 additionalProperties: true
 
+examples:
+  - |
+/ {
+  #address-cells = <2>;
+  #size-cells = <2>;
+
+  reserved-memory {
+ranges;
+
+adsp_resv: reservation-adsp {
+  /*
+   * Restrict IOVA mappings for ADSP buffers to the 512 MiB region
+   * from 0x4000 - 0x5fff. Anything outside is reserved by
+   * the ADSP for I/O memory and private memory allocations.
+   */
+  iommu-addresses = < 0x0 0x 0x00 0x4000>,
+< 0x0 0x6000 0xff 0xa000>;
+};
+
+fb: framebuffer@9000 {
+  reg = <0x0 0x9000 0x0 0x0080>;
+  iommu-addresses = < 0x0 0x9000 0x0 0x0080>;
+};
+  };
+
+  bus@0 {
+adsp: adsp@299 {
+  reg = <0x0 0x299 0x0 0x2000>;
+  memory-region = <_resv>;
+};
+
+dc0: display@1520 {
+  reg = <0x0 0x1520 0x0 0x1>;
+  memory-region = <>;
+};
+  };
+};
+
 ...
-- 
2.36.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 0/5] iommu: Support mappings/reservations in reserved-memory regions

2022-07-05 Thread Thierry Reding
From: Thierry Reding 

Hi,

This version has several fixes over the previous v5, which can be found
here:

  https://lore.kernel.org/all/20220512190052.1152377-1-thierry.red...@gmail.com/

An example is included in the DT bindings, but here is an extract of
what I've used to test this:

reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ranges;

/*
 * Creates an identity mapping for the framebuffer that
 * the firmware has setup to scan out a bootsplash from.
 */
fb: framebuffer@92cb2000 {
reg = <0x0 0x92cb2000 0x0 0x0080>;
iommu-addresses = < 0x0 0x92cb2000 0x0 0x0080>;
};

/*
 * Creates a reservation in the IOVA space to prevent
 * any buffers from being mapped to that region. Note
 * that on Tegra the range is actually quite different
 * from this, but it would conflict with the display
 * driver that I tested this against, so this is just
 * a dummy region for testing.
 */
adsp: reservation-adsp {
iommu-addresses = < 0x0 0x9000 0x0 0x0001>;
};
};

host1x@5000 {
dc@5420 {
memory-region = <>, <>;
};
};

This is abbreviated a little to focus on the essentials. Note also that
the ADSP reservation is not actually used on this device and the driver
for this doesn't exist yet, but I wanted to include this variant for
testing, because we'll want to use these bindings for the reservation
use-case as well at some point.

Adding Alyssa and Janne who have in the past tried to make these
bindings work on Apple M1. Also adding Sameer from the Tegra audio team
to look at the ADSP reservation and double-check that this is suitable
for our needs.

Thierry

Navneet Kumar (1):
  iommu/tegra-smmu: Support managed domains

Thierry Reding (4):
  dt-bindings: reserved-memory: Document iommu-addresses
  iommu: Implement of_iommu_get_resv_regions()
  iommu: dma: Use of_iommu_get_resv_regions()
  iommu/tegra-smmu: Add support for reserved regions

 .../reserved-memory/reserved-memory.txt   |  1 -
 .../reserved-memory/reserved-memory.yaml  | 62 +
 drivers/iommu/dma-iommu.c |  3 +
 drivers/iommu/of_iommu.c  | 88 +++
 drivers/iommu/tegra-smmu.c| 82 +
 include/linux/of_iommu.h  |  8 ++
 6 files changed, 225 insertions(+), 19 deletions(-)
 delete mode 100644 
Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt

-- 
2.36.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH kernel] powerpc/iommu: Add simple iommu_ops to report capabilities

2022-07-05 Thread Jason Gunthorpe via iommu
On Tue, Jul 05, 2022 at 04:22:35PM +1000, Alexey Kardashevskiy wrote:

> I have not looked into the domains for ages, what is missing here? With this
> on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks,

Does this solve all the problems or just coherency? It seems like it
should solve everything now as there will be a IOMMU_DOMAIN_BLOCKED
and the ref logic will succeed to assign it?

> +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
> +{
> + struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> +
> + if (!domain)
> + return NULL;

This should only succeed if type is IOMMU_DOMAIN_BLOCKED

> +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
> +{
> + struct iommu_group *grp = dev->iommu_group;
> +
> + if (!grp)
> + grp = ERR_PTR(-ENODEV);

It looks like this should just always fail since the code code already
checks iommu_group before calling this? (Arguably ppc should be
refactored to use the normal probe_device and device_group ops to
create groups, but that doesn't seem critical for this.

Thanks
Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH kernel] powerpc/iommu: Add simple iommu_ops to report capabilities

2022-07-05 Thread Robin Murphy

On 2022-07-05 07:22, Alexey Kardashevskiy wrote:

Historically PPC64 managed to avoid using iommu_ops. The VFIO driver
uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in
the Type1 VFIO driver. Recent development though has added a coherency
capability check to the generic part of VFIO and essentially disabled
VFIO on PPC64.

This adds a simple iommu_ops stub which reports support for cache
coherency. Because bus_set_iommu() triggers IOMMU probing of PCI devices,
this provides minimum code for the probing to not crash.


No more bus_set_iommu() please - I'll be sending a new version of this 
series very soon:


https://lore.kernel.org/linux-iommu/cover.1650890638.git.robin.mur...@arm.com/

Cheers,
Robin.


The previous discussion is here:
https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-...@ozlabs.ru/

Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence")
Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices")
Signed-off-by: Alexey Kardashevskiy 
---

I have not looked into the domains for ages, what is missing here? With this
on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks,

---
  arch/powerpc/include/asm/iommu.h |  2 +
  arch/powerpc/kernel/iommu.c  | 70 
  arch/powerpc/kernel/pci_64.c |  3 ++
  3 files changed, 75 insertions(+)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 7e29c73e3dd4..4bdae0ee29d0 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
enum dma_data_direction *direction);
  extern void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages);
+
+extern const struct iommu_ops spapr_tce_iommu_ops;
  #else
  static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 7e56ddb3e0b9..2205b448f7d5 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1176,4 +1176,74 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
  }
  EXPORT_SYMBOL_GPL(iommu_del_device);
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static bool spapr_tce_iommu_capable(enum iommu_cap cap)
+{
+   switch (cap) {
+   case IOMMU_CAP_CACHE_COHERENCY:
+   return true;
+   default:
+   break;
+   }
+
+   return false;
+}
+
+static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
+{
+   struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+
+   if (!domain)
+   return NULL;
+
+   domain->geometry.aperture_start = 0;
+   domain->geometry.aperture_end = ~0ULL;
+   domain->geometry.force_aperture = true;
+
+   return domain;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+   struct iommu_device *iommu_dev = kzalloc(sizeof(struct iommu_device), 
GFP_KERNEL);
+
+   iommu_dev->dev = dev;
+   iommu_dev->ops = _tce_iommu_ops;
+
+   return iommu_dev;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom,
+ struct device *dev)
+{
+   return 0;
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+   struct iommu_group *grp = dev->iommu_group;
+
+   if (!grp)
+   grp = ERR_PTR(-ENODEV);
+   return grp;
+}
+
+const struct iommu_ops spapr_tce_iommu_ops = {
+   .capable = spapr_tce_iommu_capable,
+   .domain_alloc = spapr_tce_iommu_domain_alloc,
+   .probe_device = spapr_tce_iommu_probe_device,
+   .release_device = spapr_tce_iommu_release_device,
+   .device_group = spapr_tce_iommu_device_group,
+   .default_domain_ops = &(const struct iommu_domain_ops) {
+   .attach_dev = spapr_tce_iommu_attach_dev,
+   }
+};
+
  #endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 19b03ddf5631..04bc0c52e45c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -20,6 +20,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -27,6 +28,7 @@
  #include 
  #include 
  #include 
+#include 
  
  /* pci_io_base -- the base address from which io bars are offsets.

   * This is the lowest I/O base address (so bar values are always positive),
@@ -69,6 +71,7 @@ static int __init pcibios_init(void)
ppc_md.pcibios_fixup();
  
  	printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");

+   bus_set_iommu(_bus_type, _tce_iommu_ops);
  
  	return 0;

  }


Re: [PATCH v2 3/9] net: mdio: Delete usage of driver_deferred_probe_check_state()

2022-07-05 Thread Geert Uytterhoeven
Hi Saravana,

On Wed, Jun 1, 2022 at 2:44 PM Saravana Kannan  wrote:
> Now that fw_devlink=on by default and fw_devlink supports interrupt
> properties, the execution will never get to the point where
> driver_deferred_probe_check_state() is called before the supplier has
> probed successfully or before deferred probe timeout has expired.
>
> So, delete the call and replace it with -ENODEV.
>
> Signed-off-by: Saravana Kannan 

Thanks for your patch, which is now commit f8217275b57aa48d ("net:
mdio: Delete usage of driver_deferred_probe_check_state()") in
driver-core/driver-core-next.

Seems like I missed something when providing my T-b for this series,
sorry for that.

arch/arm/boot/dts/r8a7791-koelsch.dts has:

 {
pinctrl-0 = <_pins>, <_pins>;
pinctrl-names = "default";

phy-handle = <>;
renesas,ether-link-active-low;
status = "okay";

phy1: ethernet-phy@1 {
compatible = "ethernet-phy-id0022.1537",
 "ethernet-phy-ieee802.3-c22";
reg = <1>;
interrupt-parent = <>;
interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
micrel,led-mode = <1>;
reset-gpios = < 22 GPIO_ACTIVE_LOW>;
};
};

Despite the interrupts property,  is now probed before irqc0
(interrupt-controller@e61c in arch/arm/boot/dts/r8a7791.dtsi),
causing the PHY not finding its interrupt, and resorting to polling:

-Micrel KSZ8041RNLI ee70.ethernet-:01: attached PHY
driver (mii_bus:phy_addr=ee70.ethernet-:01, irq=185)
+Micrel KSZ8041RNLI ee70.ethernet-:01: attached PHY
driver (mii_bus:phy_addr=ee70.ethernet-:01, irq=POLL)

Reverting this commit, and commit 9cbffc7a59561be9 ("driver core:
Delete driver_deferred_probe_check_state()") fixes that.

> --- a/drivers/net/mdio/fwnode_mdio.c
> +++ b/drivers/net/mdio/fwnode_mdio.c
> @@ -47,9 +47,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
>  * just fall back to poll mode
>  */
> if (rc == -EPROBE_DEFER)
> -   rc = driver_deferred_probe_check_state(>mdio.dev);
> -   if (rc == -EPROBE_DEFER)
> -   return rc;
> +   rc = -ENODEV;
>
> if (rc > 0) {
> phy->irq = rc;

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 03/14] iommu: Move bus setup to IOMMU device registration

2022-07-05 Thread Robin Murphy

On 2022-07-05 05:51, Baolu Lu wrote:

Hi Robin,

On 2022/4/30 02:06, Robin Murphy wrote:

On 29/04/2022 9:50 am, Robin Murphy wrote:

On 2022-04-29 07:57, Baolu Lu wrote:

Hi Robin,

On 2022/4/28 21:18, Robin Murphy wrote:

Move the bus setup to iommu_device_register(). This should allow
bus_iommu_probe() to be correctly replayed for multiple IOMMU 
instances,

and leaves bus_set_iommu() as a glorified no-op to be cleaned up next.


I re-fetched the latest patches on

https://gitlab.arm.com/linux-arm/linux-rm/-/commits/iommu/bus

and rolled back the head to "iommu: Cleanup bus_set_iommu".

The test machine still hangs during boot.

I went through the code. It seems that the .probe_device for Intel 
IOMMU

driver can't handle the probe replay well. It always assumes that the
device has never been probed.


Hmm, but probe_iommu_group() is supposed to prevent the 
__iommu_probe_device() call even happening if the device *has* 
already been probed before :/


I've still got an old Intel box spare in the office so I'll rig that 
up and see if I can see what might be going on here...


OK, on a Xeon with two DMAR units, this seems to boot OK with or 
without patch #1, so it doesn't seem to be a general problem with 
replaying in iommu_device_register(), or with platform devices. Not 
sure where to go from here... :/


The kernel boot panic message:

[    6.639432] BUG: kernel NULL pointer dereference, address: 
0028

[    6.743829] #PF: supervisor read access in kernel mode
[    6.743829] #PF: error_code(0x) - not-present page
[    6.743829] PGD 0
[    6.743829] Oops:  [#1] PREEMPT SMP NOPTI
[    6.743829] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G  I 
  5.19.0-rc3+ #182

[    6.743829] RIP: 0010:__iommu_probe_device+0x115/0x200
[    6.743829] Code: 89 ff e8 3e e1 ff ff 48 85 c0 0f 85 47 ff ff ff 41 
bd f4 ff ff ff eb 83 48 8b 83 d8 02 00 00 48 89 df 48 8b 40 38 48 8b 40 
10 <48> 8b 40 28 ff d0 0f 1f 00 48 89 c1 48 85 c0 0f 84 b7 00 00 00 48

[    6.743829] RSP: :ff30605c00063d40 EFLAGS: 00010246
[    6.743829] RAX:  RBX: ff128b9c5fdc90d0 RCX: 

[    6.743829] RDX: 8001 RSI: 0246 RDI: 
ff128b9c5fdc90d0
[    6.743829] RBP: b60c34e0 R08: b68664d0 R09: 
ff128b9501d4ce40
[    6.743829] R10: b6267096 R11: ff128b950014c267 R12: 
ff30605c00063de0
[    6.743829] R13: 001b9d28 R14: ff128b95001b9d28 R15: 
ff128b9c5fdc93a8
[    6.743829] FS:  () GS:ff128b9c5fc0() 
knlGS:

[    6.743829] CS:  0010 DS:  ES:  CR0: 80050033
[    6.743829] CR2: 0028 CR3: 000149210001 CR4: 
00771ef0
[    6.743829] DR0:  DR1:  DR2: 

[    6.743829] DR3:  DR6: 07f0 DR7: 
0400

[    6.743829] PKRU: 5554
[    6.743829] Call Trace:
[    6.743829]  
[    6.743829]  ? _raw_spin_lock_irqsave+0x17/0x40
[    6.743829]  ? __iommu_probe_device+0x200/0x200
[    6.743829]  probe_iommu_group+0x2d/0x40
[    6.743829]  bus_for_each_dev+0x74/0xc0
[    6.743829]  bus_iommu_probe+0x48/0x2d0
[    6.743829]  iommu_device_register+0xde/0x120
[    6.743829]  intel_iommu_init+0x35f/0x5f2
[    6.743829]  ? iommu_setup+0x27d/0x27d
[    6.743829]  ? rdinit_setup+0x2c/0x2c
[    6.743829]  pci_iommu_init+0xe/0x32
[    6.743829]  do_one_initcall+0x41/0x200
[    6.743829]  kernel_init_freeable+0x1de/0x228
[    6.743829]  ? rest_init+0xc0/0xc0
[    6.743829]  kernel_init+0x16/0x120
[    6.743829]  ret_from_fork+0x1f/0x30
[    6.743829]  
[    6.743829] Modules linked in:
[    6.743829] CR2: 0028
[    6.743829] ---[ end trace  ]---
[    6.743829] RIP: 0010:__iommu_probe_device+0x115/0x200
[    6.743829] Code: 89 ff e8 3e e1 ff ff 48 85 c0 0f 85 47 ff ff ff 41 
bd f4 ff ff ff eb 83 48 8b 83 d8 02 00 00 48 89 df 48 8b 40 38 48 8b 40 
10 <48> 8b 40 28 ff d0 0f 1f 00 48 89 c1 48 85 c0 0f 84 b7 00 00 00 48

[    6.743829] RSP: :ff30605c00063d40 EFLAGS: 00010246
[    6.743829] RAX:  RBX: ff128b9c5fdc90d0 RCX: 

[    6.743829] RDX: 8001 RSI: 0246 RDI: 
ff128b9c5fdc90d0
[    6.743829] RBP: b60c34e0 R08: b68664d0 R09: 
ff128b9501d4ce40
[    6.743829] R10: b6267096 R11: ff128b950014c267 R12: 
ff30605c00063de0
[    6.743829] R13: 001b9d28 R14: ff128b95001b9d28 R15: 
ff128b9c5fdc93a8
[    6.743829] FS:  () GS:ff128b9c5fc0() 
knlGS:

[    6.743829] CS:  0010 DS:  ES:  CR0: 80050033
[    6.743829] CR2: 0028 CR3: 000149210001 CR4: 
00771ef0
[    6.743829] DR0:  DR1:  DR2: 

[    6.743829] DR3:  DR6: 07f0 DR7: 
0400

[    6.743829] PKRU: 5554
[    6.743829] Kernel panic - not syncing: Fatal exception
[    

Re: [PATCH v2 1/9] PM: domains: Delete usage of driver_deferred_probe_check_state()

2022-07-05 Thread Saravana Kannan via iommu
On Fri, Jul 1, 2022 at 12:13 PM Saravana Kannan  wrote:
>
> On Fri, Jul 1, 2022 at 8:08 AM Sudeep Holla  wrote:
> >
> > Hi, Saravana,
> >
> > On Fri, Jul 01, 2022 at 01:26:12AM -0700, Saravana Kannan wrote:
> >
> > [...]
> >
> > > Can you check if this hack helps? If so, then I can think about
> > > whether we can pick it up without breaking everything else. Copy-paste
> > > tab mess up warning.
> >
> > Sorry for jumping in late and not even sure if this is right thread.
> > I have not bisected anything yet, but I am seeing issues on my Juno R2
> > with SCMI enabled power domains and Coresight AMBA devices.
> >
> > OF: amba_device_add() failed (-19) for /etf@2001
> > OF: amba_device_add() failed (-19) for /tpiu@2003
> > OF: amba_device_add() failed (-19) for /funnel@2004
> > OF: amba_device_add() failed (-19) for /etr@2007
> > OF: amba_device_add() failed (-19) for /stm@2010
> > OF: amba_device_add() failed (-19) for /replicator@2012
> > OF: amba_device_add() failed (-19) for /cpu-debug@2201
> > OF: amba_device_add() failed (-19) for /etm@2204
> > OF: amba_device_add() failed (-19) for /cti@2202
> > OF: amba_device_add() failed (-19) for /funnel@220c
> > OF: amba_device_add() failed (-19) for /cpu-debug@2211
> > OF: amba_device_add() failed (-19) for /etm@2214
> > OF: amba_device_add() failed (-19) for /cti@2212
> > OF: amba_device_add() failed (-19) for /cpu-debug@2301
> > OF: amba_device_add() failed (-19) for /etm@2304
> > OF: amba_device_add() failed (-19) for /cti@2302
> > OF: amba_device_add() failed (-19) for /funnel@230c
> > OF: amba_device_add() failed (-19) for /cpu-debug@2311
> > OF: amba_device_add() failed (-19) for /etm@2314
> > OF: amba_device_add() failed (-19) for /cti@2312
> > OF: amba_device_add() failed (-19) for /cpu-debug@2321
> > OF: amba_device_add() failed (-19) for /etm@2324
> > OF: amba_device_add() failed (-19) for /cti@2322
> > OF: amba_device_add() failed (-19) for /cpu-debug@2331
> > OF: amba_device_add() failed (-19) for /etm@2334
> > OF: amba_device_add() failed (-19) for /cti@2332
> > OF: amba_device_add() failed (-19) for /cti@2002
> > OF: amba_device_add() failed (-19) for /cti@2011
> > OF: amba_device_add() failed (-19) for /funnel@2013
> > OF: amba_device_add() failed (-19) for /etf@2014
> > OF: amba_device_add() failed (-19) for /funnel@2015
> > OF: amba_device_add() failed (-19) for /cti@2016
> >
> > These are working fine with deferred probe in the mainline.
> > I tried the hack you have suggested here(rather Tony's version),
>
> Thanks for trying that.
>
> > also
> > tried with fw_devlink=0 and fw_devlink=1
>
> 0 and 1 aren't valid input to fw_devlink. But yeah, I don't expect
> disabling it to make anything better.
>
> > && fw_devlink.strict=0
> > No change in the behaviour.
> >
> > The DTS are in arch/arm64/boot/dts/arm/juno-*-scmi.dts and there
> > coresight devices are mostly in juno-cs-r1r2.dtsi
>
> Thanks
>
> > Let me know if there is anything obvious or you want me to bisect which
> > means I need more time. I can do that next week.
>
> I'll let you know once I poke at the DTS. We need to figure out why
> fw_devlink wasn't blocking these from getting to the error (same as in
> Tony's case). But since these are amba devices, I think I have some
> guesses.
>
> This is an old series that had some issues in some cases and I haven't
> gotten around to looking at it. You can give that a shot if you can
> apply it to a recent tree.
> https://lore.kernel.org/lkml/20210304195101.3843496-1-sarava...@google.com/

I rebased it to driver-core-next and tested the patch  (for
correctness, not with your issue though). I'm fairly sure it should
help with your issue. Can you give it a shot please?

https://lore.kernel.org/lkml/20220705083934.3974140-1-sarava...@google.com/T/#u

-Saravana

>
> After looking at that old patch again, I think I know what's going on.
> For normal devices, the pm domain attach happens AFTER the device is
> added and fw_devlink has had a chance to set up device links. And if
> the suppliers aren't ready, really_probe() won't get as far as
> dev_pm_domain_attach(). But for amba, the clock and pm domain
> suppliers are "grabbed" before adding the device.
>
> So with that old patch + always returning -EPROBE_DEFER in
> amba_device_add() if amba_read_periphid() fails should fix your issue.
>
> -Saravana
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

2022-07-05 Thread Christoph Hellwig
On Wed, Jun 29, 2022 at 02:59:06PM -0300, Jason Gunthorpe wrote:
> I've tried in the past, this is not a good idea. There is no way to
> handle failures when a VMA is dup'd and if you rely on private_data
> you almost certainly have to alloc here.
> 
> Then there is the issue of making the locking work on invalidation
> which is crazy ugly.
> 
> > I was not a fan of the extra code for this either, but I was given to
> > understand that it was the standard way to collect and cleanup VMAs.
> 
> Christoph you tried tried to clean it once globally, what happened to
> that?

Al pointed out that there are various places that rely on having a
separate file system.  I might be able to go back to it and see
if we could at least do it for some users.

But what also really matters here:  I don't want every user that
wants to be able to mmap a character device to do all this work.
The layering is simply wrong, it needs some character device
based helpers, not be open code everywhere.

In fact I'm not even sure this should be a character device, it seems
to fit it way better with the PCI sysfs hierchacy, just like how we
map MMIO resources, which these are anyway.  And once it is on sysfs
we do have a uniqueue inode and need none of the pseudofs stuff, and
don't need all the glue code in nvme either.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH kernel] powerpc/iommu: Add simple iommu_ops to report capabilities

2022-07-05 Thread Alexey Kardashevskiy
Historically PPC64 managed to avoid using iommu_ops. The VFIO driver
uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in
the Type1 VFIO driver. Recent development though has added a coherency
capability check to the generic part of VFIO and essentially disabled
VFIO on PPC64.

This adds a simple iommu_ops stub which reports support for cache
coherency. Because bus_set_iommu() triggers IOMMU probing of PCI devices,
this provides minimum code for the probing to not crash.

The previous discussion is here:
https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-...@ozlabs.ru/

Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence")
Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices")
Signed-off-by: Alexey Kardashevskiy 
---

I have not looked into the domains for ages, what is missing here? With this
on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks,

---
 arch/powerpc/include/asm/iommu.h |  2 +
 arch/powerpc/kernel/iommu.c  | 70 
 arch/powerpc/kernel/pci_64.c |  3 ++
 3 files changed, 75 insertions(+)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 7e29c73e3dd4..4bdae0ee29d0 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
enum dma_data_direction *direction);
 extern void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages);
+
+extern const struct iommu_ops spapr_tce_iommu_ops;
 #else
 static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 7e56ddb3e0b9..2205b448f7d5 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1176,4 +1176,74 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
 }
 EXPORT_SYMBOL_GPL(iommu_del_device);
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static bool spapr_tce_iommu_capable(enum iommu_cap cap)
+{
+   switch (cap) {
+   case IOMMU_CAP_CACHE_COHERENCY:
+   return true;
+   default:
+   break;
+   }
+
+   return false;
+}
+
+static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
+{
+   struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+
+   if (!domain)
+   return NULL;
+
+   domain->geometry.aperture_start = 0;
+   domain->geometry.aperture_end = ~0ULL;
+   domain->geometry.force_aperture = true;
+
+   return domain;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+   struct iommu_device *iommu_dev = kzalloc(sizeof(struct iommu_device), 
GFP_KERNEL);
+
+   iommu_dev->dev = dev;
+   iommu_dev->ops = _tce_iommu_ops;
+
+   return iommu_dev;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom,
+ struct device *dev)
+{
+   return 0;
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+   struct iommu_group *grp = dev->iommu_group;
+
+   if (!grp)
+   grp = ERR_PTR(-ENODEV);
+   return grp;
+}
+
+const struct iommu_ops spapr_tce_iommu_ops = {
+   .capable = spapr_tce_iommu_capable,
+   .domain_alloc = spapr_tce_iommu_domain_alloc,
+   .probe_device = spapr_tce_iommu_probe_device,
+   .release_device = spapr_tce_iommu_release_device,
+   .device_group = spapr_tce_iommu_device_group,
+   .default_domain_ops = &(const struct iommu_domain_ops) {
+   .attach_dev = spapr_tce_iommu_attach_dev,
+   }
+};
+
 #endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 19b03ddf5631..04bc0c52e45c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -27,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* pci_io_base -- the base address from which io bars are offsets.
  * This is the lowest I/O base address (so bar values are always positive),
@@ -69,6 +71,7 @@ static int __init pcibios_init(void)
ppc_md.pcibios_fixup();
 
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
+   bus_set_iommu(_bus_type, _tce_iommu_ops);
 
return 0;
 }
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu