RE: [PATCH 4/4] iommu/vt-d: Add page response ops support
> From: Lu Baolu > Sent: Sunday, June 28, 2020 8:34 AM > > After a page request is handled, software must response the device which > raised the page request with the handling result. This is done through > the iommu ops.page_response if the request was reported to outside of > vendor iommu driver through iommu_report_device_fault(). This adds the > VT-d implementation of page_response ops. > > Co-developed-by: Jacob Pan > Signed-off-by: Jacob Pan > Co-developed-by: Liu Yi L > Signed-off-by: Liu Yi L > Signed-off-by: Lu Baolu > --- > drivers/iommu/intel/iommu.c | 1 + > drivers/iommu/intel/svm.c | 73 > + > include/linux/intel-iommu.h | 3 ++ > 3 files changed, 77 insertions(+) > > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index de17952ed133..7eb29167e8f9 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = { > .sva_bind = intel_svm_bind, > .sva_unbind = intel_svm_unbind, > .sva_get_pasid = intel_svm_get_pasid, > + .page_response = intel_svm_page_response, > #endif > }; > > diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c > index 4800bb6f8794..003ea9579632 100644 > --- a/drivers/iommu/intel/svm.c > +++ b/drivers/iommu/intel/svm.c > @@ -1092,3 +1092,76 @@ int intel_svm_get_pasid(struct iommu_sva *sva) > > return pasid; > } > + > +int intel_svm_page_response(struct device *dev, > + struct iommu_fault_event *evt, > + struct iommu_page_response *msg) > +{ > + struct iommu_fault_page_request *prm; > + struct intel_svm_dev *sdev; > + struct intel_iommu *iommu; > + struct intel_svm *svm; > + bool private_present; > + bool pasid_present; > + bool last_page; > + u8 bus, devfn; > + int ret = 0; > + u16 sid; > + > + if (!dev || !dev_is_pci(dev)) > + return -ENODEV; > + > + iommu = device_to_iommu(dev, &bus, &devfn); > + if (!iommu) > + return -ENODEV; move to the place when iommu is referenced. This place is too early. > + > + if (!msg || !evt) > + return -EINVAL; > + > + mutex_lock(&pasid_mutex); > + > + prm = &evt->fault.prm; > + sid = PCI_DEVID(bus, devfn); > + pasid_present = prm->flags & > IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; > + private_present = prm->flags & > IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; > + last_page = prm->flags & > IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; > + > + if (pasid_present) { > + /* VT-d supports devices with full 20 bit PASIDs only */ > + if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) { > + ret = -EINVAL; > + goto out; > + } shouldn't we check prm->pasid here? Above is more reasonable to be checked when page request is reported. > + > + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev); > + if (ret || !sdev) if sdev==NULL, suppose an error (-ENODEV) should be returned here? > + goto out; > + } > + > + /* > + * Per VT-d spec. v3.0 ch7.7, system software must respond > + * with page group response if private data is present (PDP) > + * or last page in group (LPIG) bit is set. This is an > + * additional VT-d feature beyond PCI ATS spec. feature->requirement Thanks Kevin > + */ > + if (last_page || private_present) { > + struct qi_desc desc; > + > + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) > | > + QI_PGRP_PASID_P(pasid_present) | > + QI_PGRP_PDP(private_present) | > + QI_PGRP_RESP_CODE(msg->code) | > + QI_PGRP_RESP_TYPE; > + desc.qw1 = QI_PGRP_IDX(prm->grpid) | > QI_PGRP_LPIG(last_page); > + desc.qw2 = 0; > + desc.qw3 = 0; > + if (private_present) > + memcpy(&desc.qw2, prm->private_data, > +sizeof(prm->private_data)); > + > + qi_submit_sync(iommu, &desc, 1, 0); > + } > +out: > + mutex_unlock(&pasid_mutex); > + return ret; > +} > diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h > index fc2cfc3db6e1..bf6009a344f5 100644 > --- a/include/linux/intel-iommu.h > +++ b/include/linux/intel-iommu.h > @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device > *dev, struct mm_struct *mm, >void *drvdata); > void intel_svm_unbind(struct iommu_sva *handle); > int intel_svm_get_pasid(struct iommu_sva *handle); > +int intel_svm_page_response(struct device *dev, struct iommu_fault_event > *evt, > + struct iommu_page_response *msg); > + > struct svm_dev_ops; > >
RE: [PATCH 3/4] iommu/vt-d: Report page request faults for guest SVA
> From: Lu Baolu > Sent: Sunday, June 28, 2020 8:34 AM > > A pasid might be bound to a page table from a VM guest via the iommu > ops.sva_bind_gpasid. In this case, when a DMA page fault is detected > on the physical IOMMU, we need to inject the page fault request into > the guest. After the guest completes handling the page fault, a page > response need to be sent back via the iommu ops.page_response(). > > This adds support to report a page request fault. Any external module > which is interested in handling this fault should regiester a notifier > callback. > > Co-developed-by: Jacob Pan > Signed-off-by: Jacob Pan > Co-developed-by: Liu Yi L > Signed-off-by: Liu Yi L > Signed-off-by: Lu Baolu > --- > drivers/iommu/intel/svm.c | 83 > +-- > 1 file changed, 80 insertions(+), 3 deletions(-) > > diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c > index c23167877b2b..4800bb6f8794 100644 > --- a/drivers/iommu/intel/svm.c > +++ b/drivers/iommu/intel/svm.c > @@ -815,6 +815,69 @@ static void intel_svm_drain_prq(struct device *dev, > int pasid) > } > } > > +static int prq_to_iommu_prot(struct page_req_dsc *req) > +{ > + int prot = 0; > + > + if (req->rd_req) > + prot |= IOMMU_FAULT_PERM_READ; > + if (req->wr_req) > + prot |= IOMMU_FAULT_PERM_WRITE; > + if (req->exe_req) > + prot |= IOMMU_FAULT_PERM_EXEC; > + if (req->pm_req) > + prot |= IOMMU_FAULT_PERM_PRIV; > + > + return prot; > +} > + > +static int > +intel_svm_prq_report(struct intel_iommu *iommu, struct page_req_dsc > *desc) > +{ > + struct iommu_fault_event event; > + struct pci_dev *pdev; > + u8 bus, devfn; > + int ret = 0; > + > + memset(&event, 0, sizeof(struct iommu_fault_event)); > + bus = PCI_BUS_NUM(desc->rid); > + devfn = desc->rid & 0xff; > + pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn); Is this step necessary? dev can be passed in (based on sdev), and more importantly iommu_report_device_fault already handles the ref counting e.g. get_device(dev) when fault handler is valid... > + > + if (!pdev) { > + pr_err("No PCI device found for PRQ [%02x:%02x.%d]\n", > +bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); > + return -ENODEV; > + } > + > + /* Fill in event data for device specific processing */ > + event.fault.type = IOMMU_FAULT_PAGE_REQ; > + event.fault.prm.addr = desc->addr; > + event.fault.prm.pasid = desc->pasid; > + event.fault.prm.grpid = desc->prg_index; > + event.fault.prm.perm = prq_to_iommu_prot(desc); > + > + /* > + * Set last page in group bit if private data is present, > + * page response is required as it does for LPIG. > + */ > + if (desc->lpig) > + event.fault.prm.flags |= > IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; > + if (desc->pasid_present) > + event.fault.prm.flags |= > IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; > + if (desc->priv_data_present) { > + event.fault.prm.flags |= > IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; why setting lpig under this condition? > + event.fault.prm.flags |= > IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; > + memcpy(event.fault.prm.private_data, desc->priv_data, > +sizeof(desc->priv_data)); > + } > + > + ret = iommu_report_device_fault(&pdev->dev, &event); > + pci_dev_put(pdev); > + > + return ret; > +} > + > static irqreturn_t prq_event_thread(int irq, void *d) > { > struct intel_iommu *iommu = d; > @@ -874,6 +937,19 @@ static irqreturn_t prq_event_thread(int irq, void *d) > if (!is_canonical_address(address)) > goto bad_req; > > + /* > + * If prq is to be handled outside iommu driver via receiver of > + * the fault notifiers, we skip the page response here. > + */ > + if (svm->flags & SVM_FLAG_GUEST_MODE) { > + int res = intel_svm_prq_report(iommu, req); > + > + if (!res) > + goto prq_advance; > + else > + goto bad_req; > + } > + I noted in bad_req there is another reporting logic: if (sdev && sdev->ops && sdev->ops->fault_cb) { int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | (req->exe_req << 1) | (req->pm_req); sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->priv_data, rwxp, result); } It was introduced in the 1st version of svm.c. It might be unrelated to this patch, but I wonder whether that one should be replaced with iommu_report_device_fault too? Thanks Kevin > /* If the mm is already defunct, don't handle fault
[PATCH v2 5/7] driver core: Add device location to "struct device" and expose it in sysfs
Add a new (optional) field to denote the physical location of a device in the system, and expose it in sysfs. This was discussed here: https://lore.kernel.org/linux-acpi/20200618184621.ga446...@kroah.com/ (The primary choice for attribute name i.e. "location" is already exposed as an ABI elsewhere, so settled for "site"). Individual buses that want to support this new attribute can opt-in by setting a flag in bus_type, and then populating the location of device while enumerating it. Signed-off-by: Rajat Jain --- v2: (Initial version) drivers/base/core.c| 35 +++ include/linux/device.h | 42 ++ include/linux/device/bus.h | 8 3 files changed, 85 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 67d39a90b45c7..14c815526b7fa 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1778,6 +1778,32 @@ static ssize_t online_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(online); +static ssize_t site_show(struct device *dev, struct device_attribute *attr, +char *buf) +{ + const char *site; + + device_lock(dev); + switch (dev->site) { + case SITE_INTERNAL: + site = "INTERNAL"; + break; + case SITE_EXTENDED: + site = "EXTENDED"; + break; + case SITE_EXTERNAL: + site = "EXTERNAL"; + break; + case SITE_UNKNOWN: + default: + site = "UNKNOWN"; + break; + } + device_unlock(dev); + return sprintf(buf, "%s\n", site); +} +static DEVICE_ATTR_RO(site); + int device_add_groups(struct device *dev, const struct attribute_group **groups) { return sysfs_create_groups(&dev->kobj, groups); @@ -1949,8 +1975,16 @@ static int device_add_attrs(struct device *dev) goto err_remove_dev_groups; } + if (bus_supports_site(dev->bus)) { + error = device_create_file(dev, &dev_attr_site); + if (error) + goto err_remove_dev_attr_online; + } + return 0; + err_remove_dev_attr_online: + device_remove_file(dev, &dev_attr_online); err_remove_dev_groups: device_remove_groups(dev, dev->groups); err_remove_type_groups: @@ -1968,6 +2002,7 @@ static void device_remove_attrs(struct device *dev) struct class *class = dev->class; const struct device_type *type = dev->type; + device_remove_file(dev, &dev_attr_site); device_remove_file(dev, &dev_attr_online); device_remove_groups(dev, dev->groups); diff --git a/include/linux/device.h b/include/linux/device.h index 15460a5ac024a..a4143735ae712 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -428,6 +428,31 @@ enum dl_dev_state { DL_DEV_UNBINDING, }; +/** + * enum device_site - Physical location of the device in the system. + * The semantics of values depend on subsystem / bus: + * + * @SITE_UNKNOWN: Location is Unknown (default) + * + * @SITE_INTERNAL: Device is internal to the system, and cannot be (easily) + * removed. E.g. SoC internal devices, onboard soldered + * devices, internal M.2 cards (that cannot be removed + * without opening the chassis). + * @SITE_EXTENDED: Device sits an extension of the system. E.g. devices + * on external PCIe trays, docking stations etc. These + * devices may be removable, but are generally housed + * internally on an extension board, so they are removed + * only when that whole extension board is removed. + * @SITE_EXTERNAL: Devices truly external to the system (i.e. plugged on + * an external port) that may be removed or added frequently. + */ +enum device_site { + SITE_UNKNOWN = 0, + SITE_INTERNAL, + SITE_EXTENDED, + SITE_EXTERNAL, +}; + /** * struct dev_links_info - Device data related to device links. * @suppliers: List of links to supplier devices. @@ -513,6 +538,7 @@ struct dev_links_info { * device (i.e. the bus driver that discovered the device). * @iommu_group: IOMMU group the device belongs to. * @iommu: Per device generic IOMMU runtime data + * @site: Physical location of the device w.r.t. the system * * @offline_disabled: If set, the device is permanently online. * @offline: Set after successful invocation of bus type's .offline(). @@ -613,6 +639,8 @@ struct device { struct iommu_group *iommu_group; struct dev_iommu*iommu; + enum device_sitesite; /* Device physical location */ + booloffline_disabled:1; booloffline:1; boolof_node_reused:1; @@ -806,6 +834,20 @@ static inline bool dev_has_
[PATCH v2 7/7] PCI: Add parameter to disable attaching external devices
Introduce a PCI parameter that disables the automatic attachment of external devices to their drivers. This is needed to allow an admin to control which drivers he wants to allow on external ports. For more context, see threads at: https://lore.kernel.org/linux-pci/20200609210400.GA1461839@bjorn-Precision-5520/ https://lore.kernel.org/linux-pci/cack8z6h-dzqybmqtu5_h5ttwwn35q7yysm9a7wj0twfqp8q...@mail.gmail.com/ drivers_autoprobe can only be disabled after userspace comes up. So any external devices that were plugged in before boot may still bind to drivers before userspace gets a chance to clear drivers_autoprobe. Another problem is that even with drivers_autoprobe=0, the hot-added PCI devices are bound to drivers because PCI explicitly calls device_attach() asking driver core to find and attach a driver. This patch helps with both of these problems. Signed-off-by: Rajat Jain --- v2: Use the newly introduced dev_is_external() from device core commit log elaborated drivers/pci/bus.c | 11 --- drivers/pci/pci.c | 9 + drivers/pci/pci.h | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 3cef835b375fd..c11725bccffb0 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -321,9 +321,14 @@ void pci_bus_add_device(struct pci_dev *dev) pci_bridge_d3_update(dev); dev->match_driver = true; - retval = device_attach(&dev->dev); - if (retval < 0 && retval != -EPROBE_DEFER) - pci_warn(dev, "device attach failed (%d)\n", retval); + + if (pci_dont_attach_external_devs && dev_is_external(&dev->dev)) { + pci_info(dev, "not attaching external device\n"); + } else { + retval = device_attach(&dev->dev); + if (retval < 0 && retval != -EPROBE_DEFER) + pci_warn(dev, "device attach failed (%d)\n", retval); + } pci_dev_assign_added(dev, true); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 35f25ac39167b..3ebcfa8b33178 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -128,6 +128,13 @@ static bool pcie_ats_disabled; /* If set, the PCI config space of each device is printed during boot. */ bool pci_early_dump; +/* + * If set, the devices behind external-facing bridges (as marked by firmware) + * shall not be attached automatically. Userspace will need to attach them + * manually: echo > /sys/bus/pci/drivers//bind + */ +bool pci_dont_attach_external_devs; + bool pci_ats_disabled(void) { return pcie_ats_disabled; @@ -6539,6 +6546,8 @@ static int __init pci_setup(char *str) pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); } else if (!strncmp(str, "disable_acs_redir=", 18)) { disable_acs_redir_param = str + 18; + } else if (!strcmp(str, "dont_attach_external_devs")) { + pci_dont_attach_external_devs = true; } else { pr_err("PCI: Unknown option `%s'\n", str); } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 12fb79fbe29d3..875fecb9b2612 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -13,6 +13,7 @@ extern const unsigned char pcie_link_speed[]; extern bool pci_early_dump; +extern bool pci_dont_attach_external_devs; bool pcie_cap_has_lnkctl(const struct pci_dev *dev); bool pcie_cap_has_rtctl(const struct pci_dev *dev); -- 2.27.0.212.ge8ba1cc988-goog ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH net] xsk: remove cheap_dma optimization
On Mon, Jun 29, 2020 at 05:18:38PM +0200, Daniel Borkmann wrote: > On 6/29/20 5:10 PM, Björn Töpel wrote: >> On 2020-06-29 15:52, Daniel Borkmann wrote: >>> >>> Ok, fair enough, please work with DMA folks to get this properly integrated >>> and >>> restored then. Applied, thanks! >> >> Daniel, you were too quick! Please revert this one; Christoph just submitted >> a 4-patch-series that addresses both the DMA API, and the perf regression! > > Nice, tossed from bpf tree then! (Looks like it didn't land on the bpf list > yet, > but seems other mails are currently stuck as well on vger. I presume it will > be > routed to Linus via Christoph?) I send the patches to the bpf list, did you get them now that vger is unclogged? Thinking about it the best route might be through bpf/net, so if that works for you please pick it up. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/7] iommu/vt-d: Fix PASID devTLB invalidation
On Tue, 30 Jun 2020 03:01:29 + "Tian, Kevin" wrote: > > From: Lu Baolu > > Sent: Thursday, June 25, 2020 3:26 PM > > > > On 2020/6/23 23:43, Jacob Pan wrote: > > > DevTLB flush can be used for both DMA request with and without > > > PASIDs. The former uses PASID#0 (RID2PASID), latter uses non-zero > > > PASID for SVA usage. > > > > > > This patch adds a check for PASID value such that devTLB flush > > > with PASID is used for SVA case. This is more efficient in that > > > multiple PASIDs can be used by a single device, when tearing down > > > a PASID entry we shall flush only the devTLB specific to a PASID. > > > > > > Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table") > > btw is it really a fix? From the description it's more like an > optimization... > I guess it depends on how the issue is perceived. There is no functional problem but the flush is too coarse w/o this patch. > > > Signed-off-by: Jacob Pan > > > --- > > > drivers/iommu/intel/pasid.c | 11 ++- > > > 1 file changed, 10 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/iommu/intel/pasid.c > > > b/drivers/iommu/intel/pasid.c index c81f0f17c6ba..3991a24539a1 > > > 100644 --- a/drivers/iommu/intel/pasid.c > > > +++ b/drivers/iommu/intel/pasid.c > > > @@ -486,7 +486,16 @@ devtlb_invalidation_with_pasid(struct > > intel_iommu *iommu, > > > qdep = info->ats_qdep; > > > pfsid = info->pfsid; > > > > > > - qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - > > > VTD_PAGE_SHIFT); > > > + /* > > > + * When PASID 0 is used, it indicates RID2PASID(DMA > > > request w/o > > PASID), > > > + * devTLB flush w/o PASID should be used. For non-zero > > > PASID under > > > + * SVA usage, device could do DMA with multiple PASIDs. > > > It is more > > > + * efficient to flush devTLB specific to the PASID. > > > + */ > > > + if (pasid) > > > > How about > > > > if (pasid == PASID_RID2PASID) > > qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - > > VTD_PAGE_SHIFT); > > else > > qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, > > qdep, 0, 64 - > > VTD_PAGE_SHIFT); > > > > ? > > > > It makes the code more readable and still works even we reassign > > another pasid for RID2PASID. > > > > Best regards, > > baolu > > > > > + qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, > > > pasid, qdep, 0, > > 64 - VTD_PAGE_SHIFT); > > > + else > > > + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, > > > 64 - > > VTD_PAGE_SHIFT); > > > } > > > > > > void intel_pasid_tear_down_entry(struct intel_iommu *iommu, > > > struct > > device *dev, > > > [Jacob Pan] ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 3/7] PCI/ACS: Enable PCI_ACS_TB for untrusted/external-facing devices
When enabling ACS, enable translation blocking for external facing ports and untrusted devices. Signed-off-by: Rajat Jain --- v2: Commit log change drivers/pci/pci.c| 4 drivers/pci/quirks.c | 11 +++ 2 files changed, 15 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d2ff987585855..79853b52658a2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3330,6 +3330,10 @@ static void pci_std_enable_acs(struct pci_dev *dev) /* Upstream Forwarding */ ctrl |= (cap & PCI_ACS_UF); + if (dev->external_facing || dev->untrusted) + /* Translation Blocking */ + ctrl |= (cap & PCI_ACS_TB); + pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b341628e47527..6294adeac4049 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4934,6 +4934,13 @@ static void pci_quirk_enable_intel_rp_mpc_acs(struct pci_dev *dev) } } +/* + * Currently this quirk does the equivalent of + * PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_SV + * + * Currently missing, it also needs to do equivalent of PCI_ACS_TB, + * if dev->external_facing || dev->untrusted + */ static int pci_quirk_enable_intel_pch_acs(struct pci_dev *dev) { if (!pci_quirk_intel_pch_acs_match(dev)) @@ -4973,6 +4980,10 @@ static int pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev) ctrl |= (cap & PCI_ACS_CR); ctrl |= (cap & PCI_ACS_UF); + if (dev->external_facing || dev->untrusted) + /* Translation Blocking */ + ctrl |= (cap & PCI_ACS_TB); + pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl); pci_info(dev, "Intel SPT PCH root port ACS workaround enabled\n"); -- 2.27.0.212.ge8ba1cc988-goog ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 4/7] PCI: Add device even if driver attach failed
device_attach() returning failure indicates a driver error while trying to probe the device. In such a scenario, the PCI device should still be added in the system and be visible to the user. This patch partially reverts: commit ab1a187bba5c ("PCI: Check device_attach() return value always") Signed-off-by: Rajat Jain Reviewed-by: Greg Kroah-Hartman --- v2: Cosmetic change in commit log. Add Greg's "reviewed-by" drivers/pci/bus.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 8e40b3e6da77d..3cef835b375fd 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -322,12 +322,8 @@ void pci_bus_add_device(struct pci_dev *dev) dev->match_driver = true; retval = device_attach(&dev->dev); - if (retval < 0 && retval != -EPROBE_DEFER) { + if (retval < 0 && retval != -EPROBE_DEFER) pci_warn(dev, "device attach failed (%d)\n", retval); - pci_proc_detach_device(dev); - pci_remove_sysfs_dev_files(dev); - return; - } pci_dev_assign_added(dev, true); } -- 2.27.0.212.ge8ba1cc988-goog ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 6/7] PCI: Move pci_dev->untrusted logic to use device location instead
The firmware was provinding "ExternalFacing" attribute on PCI root ports, to allow the kernel to mark devices behind it as external. Note that the firmware provides an immutable, read-only property, i.e. the location of the device. The use of (external) device location as hint for (dis)trust, is a decision that IOMMU drivers have taken, so we should call it out explicitly. This patch removes the pci_dev->untrusted and changes the users of it to use device core provided device location instead. This location is populated by PCI using the same "ExternalFacing" firmware info. Any device not behind the "ExternalFacing" bridges are marked internal and the ones behind such bridges are markes external. Signed-off-by: Rajat Jain --- v2: (Initial version) drivers/iommu/intel/iommu.c | 31 +-- drivers/pci/ats.c | 2 +- drivers/pci/pci-driver.c| 1 + drivers/pci/pci.c | 2 +- drivers/pci/probe.c | 18 -- drivers/pci/quirks.c| 2 +- include/linux/pci.h | 10 +- 7 files changed, 38 insertions(+), 28 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1ccb224f82496..ca66a196f5e97 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -168,6 +168,22 @@ static inline unsigned long virt_to_dma_pfn(void *p) return page_to_dma_pfn(virt_to_page(p)); } +static inline bool untrusted_dev(struct device *dev) +{ + /* +* Treat all external PCI devices as untrusted devices. These are the +* devices behing marked behind external-facing bridges as marked by +* the firmware. The untrusted devices are the ones that can potentially +* execute DMA attacks and similar. They are typically connected through +* external thunderbolt ports. When an IOMMU is enabled they should be +* getting full mappings to ensure they cannot access arbitrary memory. +*/ + if (dev_is_pci(dev) && dev_is_external(dev)) + return true; + + return false; +} + /* global iommu list, set NULL for ignored DMAR units */ static struct intel_iommu **g_iommus; @@ -383,8 +399,7 @@ struct device_domain_info *get_domain_info(struct device *dev) DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); -#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \ - to_pci_dev(d)->untrusted) +#define device_needs_bounce(d) (!intel_no_bounce && untrusted_dev(d)) /* * Iterate over elements in device_domain_list and call the specified @@ -2830,7 +2845,7 @@ static int device_def_domain_type(struct device *dev) * Prevent any device marked as untrusted from getting * placed into the statically identity mapping domain. */ - if (pdev->untrusted) + if (untrusted_dev(dev)) return IOMMU_DOMAIN_DMA; if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) @@ -3464,7 +3479,6 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) unsigned long iova_pfn; struct intel_iommu *iommu; struct page *freelist; - struct pci_dev *pdev = NULL; domain = find_domain(dev); BUG_ON(!domain); @@ -3477,11 +3491,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) start_pfn = mm_to_dma_pfn(iova_pfn); last_pfn = start_pfn + nrpages - 1; - if (dev_is_pci(dev)) - pdev = to_pci_dev(dev); - freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict || (pdev && pdev->untrusted) || + if (intel_iommu_strict || untrusted_dev(dev) || !has_iova_flush_queue(&domain->iovad)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); @@ -4743,7 +4754,7 @@ static inline bool has_untrusted_dev(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->untrusted || pdev->external_facing) + if (pdev->external_facing || untrusted_dev(&pdev->dev)) return true; return false; @@ -6036,7 +6047,7 @@ intel_iommu_domain_set_attr(struct iommu_domain *domain, */ static bool risky_device(struct pci_dev *pdev) { - if (pdev->untrusted) { + if (untrusted_dev(&pdev->dev)) { pci_info(pdev, "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n", pdev->vendor, pdev->device); diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index b761c1f72f672..ebd370f4d5b06 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -42,7 +42,7 @@ bool pci_ats_supported(struct pci_dev *dev) if (!dev->ats_cap) return fa
[PATCH v2 2/7] PCI: Set "untrusted" flag for truly external devices only
The "ExternalFacing" devices (root ports) are still internal devices that sit on the internal system fabric and thus trusted. Currently they were being marked untrusted. This patch uses the platform flag to identify the external facing devices and then use it to mark any downstream devices as "untrusted". The external-facing devices themselves are left as "trusted". This was discussed here: https://lkml.org/lkml/2020/6/10/1049 Signed-off-by: Rajat Jain --- v2: cosmetic changes in commit log drivers/iommu/intel/iommu.c | 2 +- drivers/pci/of.c| 2 +- drivers/pci/pci-acpi.c | 13 +++-- drivers/pci/probe.c | 2 +- include/linux/pci.h | 8 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d759e7234e982..1ccb224f82496 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4743,7 +4743,7 @@ static inline bool has_untrusted_dev(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->untrusted) + if (pdev->untrusted || pdev->external_facing) return true; return false; diff --git a/drivers/pci/of.c b/drivers/pci/of.c index 27839cd2459f6..22727fc9558df 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -42,7 +42,7 @@ void pci_set_bus_of_node(struct pci_bus *bus) } else { node = of_node_get(bus->self->dev.of_node); if (node && of_property_read_bool(node, "external-facing")) - bus->self->untrusted = true; + bus->self->external_facing = true; } bus->dev.of_node = node; diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 7224b1e5f2a83..492c07805caf8 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1213,22 +1213,23 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev, ACPI_FREE(obj); } -static void pci_acpi_set_untrusted(struct pci_dev *dev) +static void pci_acpi_set_external_facing(struct pci_dev *dev) { u8 val; - if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) + if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT && + pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) return; if (device_property_read_u8(&dev->dev, "ExternalFacingPort", &val)) return; /* -* These root ports expose PCIe (including DMA) outside of the -* system so make sure we treat them and everything behind as +* These root/down ports expose PCIe (including DMA) outside of the +* system so make sure we treat everything behind them as * untrusted. */ if (val) - dev->untrusted = 1; + dev->external_facing = 1; } static void pci_acpi_setup(struct device *dev) @@ -1240,7 +1241,7 @@ static void pci_acpi_setup(struct device *dev) return; pci_acpi_optimize_delay(pci_dev, adev->handle); - pci_acpi_set_untrusted(pci_dev); + pci_acpi_set_external_facing(pci_dev); pci_acpi_add_edr_notifier(pci_dev); pci_acpi_add_pm_notifier(adev, pci_dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 6d87066a5ecc5..8c40c00413e74 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1552,7 +1552,7 @@ static void set_pcie_untrusted(struct pci_dev *dev) * untrusted as well. */ parent = pci_upstream_bridge(dev); - if (parent && parent->untrusted) + if (parent && (parent->untrusted || parent->external_facing)) dev->untrusted = true; } diff --git a/include/linux/pci.h b/include/linux/pci.h index a26be5332bba6..fe1bc603fda40 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -432,6 +432,14 @@ struct pci_dev { * mappings to make sure they cannot access arbitrary memory. */ unsigned intuntrusted:1; + /* +* Devices are marked as external-facing using info from platform +* (ACPI / devicetree). An external-facing device is still an internal +* trusted device, but it faces external untrusted devices. Thus any +* devices enumerated downstream an external-facing device is marked +* as untrusted. +*/ + unsigned intexternal_facing:1; unsigned intbroken_intx_masking:1; /* INTx masking can't be used */ unsigned intio_window_1k:1; /* Intel bridge 1K I/O windows */ unsigned intirq_managed:1; -- 2.27.0.212.ge8ba1cc988-goog ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/7] iommu/vt-d: Fix PASID devTLB invalidation
On Thu, 25 Jun 2020 15:25:57 +0800 Lu Baolu wrote: > On 2020/6/23 23:43, Jacob Pan wrote: > > DevTLB flush can be used for both DMA request with and without > > PASIDs. The former uses PASID#0 (RID2PASID), latter uses non-zero > > PASID for SVA usage. > > > > This patch adds a check for PASID value such that devTLB flush with > > PASID is used for SVA case. This is more efficient in that multiple > > PASIDs can be used by a single device, when tearing down a PASID > > entry we shall flush only the devTLB specific to a PASID. > > > > Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table") > > Signed-off-by: Jacob Pan > > --- > > drivers/iommu/intel/pasid.c | 11 ++- > > 1 file changed, 10 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/iommu/intel/pasid.c > > b/drivers/iommu/intel/pasid.c index c81f0f17c6ba..3991a24539a1 > > 100644 --- a/drivers/iommu/intel/pasid.c > > +++ b/drivers/iommu/intel/pasid.c > > @@ -486,7 +486,16 @@ devtlb_invalidation_with_pasid(struct > > intel_iommu *iommu, qdep = info->ats_qdep; > > pfsid = info->pfsid; > > > > - qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - > > VTD_PAGE_SHIFT); > > + /* > > +* When PASID 0 is used, it indicates RID2PASID(DMA > > request w/o PASID), > > +* devTLB flush w/o PASID should be used. For non-zero > > PASID under > > +* SVA usage, device could do DMA with multiple PASIDs. It > > is more > > +* efficient to flush devTLB specific to the PASID. > > +*/ > > + if (pasid) > > How about > > if (pasid == PASID_RID2PASID) > qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - > VTD_PAGE_SHIFT); else > qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, > qdep, 0, 64 - VTD_PAGE_SHIFT); > > ? > > It makes the code more readable and still works even we reassign > another pasid for RID2PASID. > agreed, thanks. > Best regards, > baolu > > > + qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, > > qdep, 0, 64 - VTD_PAGE_SHIFT); > > + else > > + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 > > - VTD_PAGE_SHIFT); } > > > > void intel_pasid_tear_down_entry(struct intel_iommu *iommu, > > struct device *dev, [Jacob Pan] ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 1/7] PCI: Keep the ACS capability offset in device
Currently this is being looked up at a number of places. Read and store it once at bootup so that it can be used by all later. Signed-off-by: Rajat Jain --- v2: Commit log cosmetic changes drivers/pci/p2pdma.c | 2 +- drivers/pci/pci.c| 21 + drivers/pci/pci.h| 2 +- drivers/pci/probe.c | 2 +- drivers/pci/quirks.c | 8 include/linux/pci.h | 1 + 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index e8e444eeb1cd2..f29a48f8fa594 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -253,7 +253,7 @@ static int pci_bridge_has_acs_redir(struct pci_dev *pdev) int pos; u16 ctrl; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS); + pos = pdev->acs_cap; if (!pos) return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ce096272f52b1..d2ff987585855 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -51,6 +51,7 @@ EXPORT_SYMBOL(pci_pci_problems); unsigned int pci_pm_d3_delay; +static void pci_enable_acs(struct pci_dev *dev); static void pci_pme_list_scan(struct work_struct *work); static LIST_HEAD(pci_pme_list); @@ -3284,7 +3285,7 @@ static void pci_disable_acs_redir(struct pci_dev *dev) if (!pci_dev_specific_disable_acs_redir(dev)) return; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); + pos = dev->acs_cap; if (!pos) { pci_warn(dev, "cannot disable ACS redirect for this hardware as it does not have ACS capabilities\n"); return; @@ -3310,7 +3311,7 @@ static void pci_std_enable_acs(struct pci_dev *dev) u16 cap; u16 ctrl; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); + pos = dev->acs_cap; if (!pos) return; @@ -3336,7 +3337,7 @@ static void pci_std_enable_acs(struct pci_dev *dev) * pci_enable_acs - enable ACS if hardware support it * @dev: the PCI device */ -void pci_enable_acs(struct pci_dev *dev) +static void pci_enable_acs(struct pci_dev *dev) { if (!pci_acs_enable) goto disable_acs_redir; @@ -3362,7 +3363,7 @@ static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags) int pos; u16 cap, ctrl; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS); + pos = pdev->acs_cap; if (!pos) return false; @@ -3487,6 +3488,18 @@ bool pci_acs_path_enabled(struct pci_dev *start, return true; } +/** + * pci_acs_init - Initialize if hardware supports it + * @dev: the PCI device + */ +void pci_acs_init(struct pci_dev *dev) +{ + dev->acs_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); + + if (dev->acs_cap) + pci_enable_acs(dev); +} + /** * pci_rebar_find_pos - find position of resize ctrl reg for BAR * @pdev: PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6d3f758671064..12fb79fbe29d3 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -532,7 +532,7 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, return resource_alignment(res); } -void pci_enable_acs(struct pci_dev *dev); +void pci_acs_init(struct pci_dev *dev); #ifdef CONFIG_PCI_QUIRKS int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); int pci_dev_specific_enable_acs(struct pci_dev *dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2f66988cea257..6d87066a5ecc5 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2390,7 +2390,7 @@ static void pci_init_capabilities(struct pci_dev *dev) pci_ats_init(dev); /* Address Translation Services */ pci_pri_init(dev); /* Page Request Interface */ pci_pasid_init(dev);/* Process Address Space ID */ - pci_enable_acs(dev);/* Enable ACS P2P upstream forwarding */ + pci_acs_init(dev); /* Access Control Services */ pci_ptm_init(dev); /* Precision Time Measurement */ pci_aer_init(dev); /* Advanced Error Reporting */ pci_dpc_init(dev); /* Downstream Port Containment */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 812bfc32ecb82..b341628e47527 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4653,7 +4653,7 @@ static int pci_quirk_intel_spt_pch_acs(struct pci_dev *dev, u16 acs_flags) if (!pci_quirk_intel_spt_pch_acs_match(dev)) return -ENOTTY; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); + pos = dev->acs_cap; if (!pos) return -ENOTTY; @@ -4961,7 +4961,7 @@ static int pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev) if (!pci_quirk_intel_spt_pch_acs_match(dev)) return -ENOTTY; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_
[PATCH v2 0/7] Tighten PCI security, expose dev location in sysfs
This is a set of loosely related patches most of whom emerged out of discussion in the following threads. In a nutshell the goal was to allow an administrator to specify which driver he wants to allow on external ports, and a strategy was chalked out: https://lore.kernel.org/linux-pci/20200609210400.GA1461839@bjorn-Precision-5520/ https://lore.kernel.org/linux-pci/20200618184621.ga446...@kroah.com/ https://lore.kernel.org/linux-pci/20200627050225.ga226...@kroah.com/ * The first 3 patches tighten the PCI security using ACS, and take care of a border case. * The 4th patch takes care of PCI bug. * 5th and 6th patches expose a device's location into the sysfs to allow admin to make decision based on that. * 7th patch is to ensure that the external devices don't bind to drivers during boot. Rajat Jain (7): PCI: Keep the ACS capability offset in device PCI: Set "untrusted" flag for truly external devices only PCI/ACS: Enable PCI_ACS_TB for untrusted/external-facing devices PCI: Add device even if driver attach failed driver core: Add device location to "struct device" and expose it in sysfs PCI: Move pci_dev->untrusted logic to use device location instead PCI: Add parameter to disable attaching external devices drivers/base/core.c | 35 +++ drivers/iommu/intel/iommu.c | 31 ++- drivers/pci/ats.c | 2 +- drivers/pci/bus.c | 13 ++-- drivers/pci/of.c| 2 +- drivers/pci/p2pdma.c| 2 +- drivers/pci/pci-acpi.c | 13 ++-- drivers/pci/pci-driver.c| 1 + drivers/pci/pci.c | 34 ++ drivers/pci/pci.h | 3 ++- drivers/pci/probe.c | 20 +++--- drivers/pci/quirks.c| 19 + include/linux/device.h | 42 + include/linux/device/bus.h | 8 +++ include/linux/pci.h | 13 ++-- 15 files changed, 191 insertions(+), 47 deletions(-) -- 2.27.0.212.ge8ba1cc988-goog ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v3 02/14] iommu: Report domain nesting info
> From: Tian, Kevin > Sent: Tuesday, June 30, 2020 10:01 AM > > > From: Liu, Yi L > > Sent: Monday, June 29, 2020 8:23 PM > > > > Hi Stefan, > > > > > From: Stefan Hajnoczi > > > Sent: Monday, June 29, 2020 5:25 PM > > > > > > On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote: > > > > +/* > > > > + * struct iommu_nesting_info - Information for nesting-capable IOMMU. > > > > + * user space should check it before using > > > > + * nesting capability. > > > > + * > > > > + * @size: size of the whole structure > > > > + * @format:PASID table entry format, the same definition with > > > > + * @format of struct iommu_gpasid_bind_data. > > > > + * @features: supported nesting features. > > > > + * @flags: currently reserved for future extension. > > > > + * @data: vendor specific cap info. > > > > + * > > > > + * > > > > +---++ > > > > + * | feature | Notes > > > > | > > > > + * > > > > > +===+=== > > > > > =+ > > > > + * | SYSWIDE_PASID | Kernel manages PASID in system wide, PASIDs > > used | > > > > + * | | in the system should be allocated by host kernel > > > > | > > > > + * > > > > +---++ > > > > + * | BIND_PGTBL| bind page tables to host PASID, the PASID could > > > > | > > > > + * | | either be a host PASID passed in bind request or > > > > | > > > > + * | | default PASIDs (e.g. default PASID of > > > > aux-domain) | > > > > + * > > > > +---++ > > > > + * | CACHE_INVLD | mandatory feature for nesting capable IOMMU > > | > > > > + * > > > > +---++ > > > > > > This feature description is vague about what CACHE_INVLD does and how > > to > > > use it. If I understand correctly, the presence of this feature means > > > that VFIO_IOMMU_NESTING_OP_CACHE_INVLD must be used? > > > > > > The same kind of clarification could be done for SYSWIDE_PASID and > > > BIND_PGTBL too. > > > > For SYSWIDE_PASID and BIND_PGTBL, yes, presence of the feature bit > > means must use. So the two are requirements to user space if it wants > > to setup nesting. While for CACHE_INVLD, it's kind of availability > > here. How about removing CACHE_INVLD as presence of BIND_PGTBL should > > indicates support of CACHE_INVLD? > > > > So far this assumption is correct but it may not be true when thinking > forward. > For example, a vendor might find a way to allow the owner of 1st-level page > table to directly invalidate cache w/o going through host IOMMU driver. From > this angle I feel explicitly reporting this capability is more robust. I see. explicitly require 1st-level page table owner to do cache invalidation after modifying page table is fair to me. > Regarding to the description, what about below? > > -- > SYSWIDE_PASID: PASIDs are managed in system-wide, instead of per device. > When a device is assigned to userspace or VM, proper uAPI (provided by > userspace driver framework, e.g. VFIO) must be used to allocate/free PASIDs > for the assigned device. > > BIND_PGTBL: The owner of the first-level/stage-1 page table must explicitly > bind the page table to associated PASID (either the one specified in bind > request or the default PASID of the iommu domain), through VFIO_IOMMU > _NESTING_OP > > CACHE_INVLD: The owner of the first-level/stage-1 page table must > explicitly invalidate the IOMMU cache through VFIO_IOMMU_NESTING_OP, > according to vendor-specific requirement when changing the page table. > -- thanks for the statements, will apply. Regards, Yi Liu > Thanks > Kevin > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 01/12] ACPI/IORT: Make iort_match_node_callback walk the ACPI namespace for NC
On 2020/6/29 17:05, Lorenzo Pieralisi wrote: On Mon, Jun 29, 2020 at 12:24:43PM +0800, Hanjun Guo wrote: Hi Lorenzo, On 2020/6/19 16:20, Lorenzo Pieralisi wrote: When the iort_match_node_callback is invoked for a named component the match should be executed upon a device with an ACPI companion. For devices with no ACPI companion set-up the ACPI device tree must be walked in order to find the first parent node with a companion set and check the parent node against the named component entry to check whether there is a match and therefore an IORT node describing the in/out ID translation for the device has been found. Signed-off-by: Lorenzo Pieralisi Cc: Will Deacon Cc: Hanjun Guo Cc: Sudeep Holla Cc: Catalin Marinas Cc: Robin Murphy Cc: "Rafael J. Wysocki" --- drivers/acpi/arm64/iort.c | 20 ++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 28a6b387e80e..5eee81758184 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -264,15 +264,31 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node, if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_device *adev = to_acpi_device_node(dev->fwnode); + struct acpi_device *adev; struct acpi_iort_named_component *ncomp; + struct device *nc_dev = dev; + + /* +* Walk the device tree to find a device with an +* ACPI companion; there is no point in scanning +* IORT for a device matching a named component if +* the device does not have an ACPI companion to +* start with. +*/ + do { + adev = ACPI_COMPANION(nc_dev); + if (adev) + break; + + nc_dev = nc_dev->parent; + } while (nc_dev); I'm lost here, we need the ACPI_COMPANION (the same as to_acpi_device_node()) of the device, but why do we need to go up to find the parent node? For devices that aren't described in the DSDT - IORT translations are determined by their ACPI parent device. Do you see/Have you found any issue with this approach ? The spec says "Describes the IO relationships between devices represented in the ACPI namespace.", and in section 3.1.1.3 Named component node, it says: "Named component nodes are used to describe devices that are also included in the Differentiated System Description Table (DSDT). See [ACPI]." So from my understanding, the IORT spec for now, can only do ID translations for devices in the DSDT. For a platform device, if I use its parent's full path name for its named component entry, then it will match, but this will violate the IORT spec. Can you elaborate on this please I don't get the point you are making. For example, device A is not described in DSDT so can't represent as a NC node in IORT. Device B can be described in DSDT and it is the parent of device A, so device B can be represented in IORT with memory access properties and node flags with Substream width and Stall supported info. When we trying to translate device A's ID, we reuse all the memory access properties and node flags from its parent (device B), but will it the same? So the IORT spec don't support this, at least it's pretty vague I think. Thanks Hanjun ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH 3/7] iommu/vt-d: Fix PASID devTLB invalidation
> From: Lu Baolu > Sent: Thursday, June 25, 2020 3:26 PM > > On 2020/6/23 23:43, Jacob Pan wrote: > > DevTLB flush can be used for both DMA request with and without PASIDs. > > The former uses PASID#0 (RID2PASID), latter uses non-zero PASID for SVA > > usage. > > > > This patch adds a check for PASID value such that devTLB flush with > > PASID is used for SVA case. This is more efficient in that multiple > > PASIDs can be used by a single device, when tearing down a PASID entry > > we shall flush only the devTLB specific to a PASID. > > > > Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table") btw is it really a fix? From the description it's more like an optimization... > > Signed-off-by: Jacob Pan > > --- > > drivers/iommu/intel/pasid.c | 11 ++- > > 1 file changed, 10 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c > > index c81f0f17c6ba..3991a24539a1 100644 > > --- a/drivers/iommu/intel/pasid.c > > +++ b/drivers/iommu/intel/pasid.c > > @@ -486,7 +486,16 @@ devtlb_invalidation_with_pasid(struct > intel_iommu *iommu, > > qdep = info->ats_qdep; > > pfsid = info->pfsid; > > > > - qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); > > + /* > > +* When PASID 0 is used, it indicates RID2PASID(DMA request w/o > PASID), > > +* devTLB flush w/o PASID should be used. For non-zero PASID under > > +* SVA usage, device could do DMA with multiple PASIDs. It is more > > +* efficient to flush devTLB specific to the PASID. > > +*/ > > + if (pasid) > > How about > > if (pasid == PASID_RID2PASID) > qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - > VTD_PAGE_SHIFT); > else > qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, > 64 - > VTD_PAGE_SHIFT); > > ? > > It makes the code more readable and still works even we reassign another > pasid for RID2PASID. > > Best regards, > baolu > > > + qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, > 64 - VTD_PAGE_SHIFT); > > + else > > + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - > VTD_PAGE_SHIFT); > > } > > > > void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct > device *dev, > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v3 1/5] docs: IOMMU user API
> From: Jacob Pan > Sent: Tuesday, June 30, 2020 7:05 AM > > On Fri, 26 Jun 2020 16:19:23 -0600 > Alex Williamson wrote: > > > On Tue, 23 Jun 2020 10:03:53 -0700 > > Jacob Pan wrote: > > > > > IOMMU UAPI is newly introduced to support communications between > > > guest virtual IOMMU and host IOMMU. There has been lots of > > > discussions on how it should work with VFIO UAPI and userspace in > > > general. > > > > > > This document is indended to clarify the UAPI design and usage. The > > > mechenics of how future extensions should be achieved are also > > > covered in this documentation. > > > > > > Signed-off-by: Liu Yi L > > > Signed-off-by: Jacob Pan > > > --- > > > Documentation/userspace-api/iommu.rst | 244 > > > ++ 1 file changed, 244 insertions(+) > > > create mode 100644 Documentation/userspace-api/iommu.rst > > > > > > diff --git a/Documentation/userspace-api/iommu.rst > > > b/Documentation/userspace-api/iommu.rst new file mode 100644 > > > index ..f9e4ed90a413 > > > --- /dev/null > > > +++ b/Documentation/userspace-api/iommu.rst > > > @@ -0,0 +1,244 @@ > > > +.. SPDX-License-Identifier: GPL-2.0 > > > +.. iommu: > > > + > > > += > > > +IOMMU Userspace API > > > += > > > + > > > +IOMMU UAPI is used for virtualization cases where communications > > > are +needed between physical and virtual IOMMU drivers. For native > > > +usage, IOMMU is a system device which does not need to communicate > > > +with user space directly. > > > + > > > +The primary use cases are guest Shared Virtual Address (SVA) and > > > +guest IO virtual address (IOVA), wherein a virtual IOMMU (vIOMMU) > > > is +required to communicate with the physical IOMMU in the host. > > > + > > > +.. contents:: :local: > > > + > > > +Functionalities > > > +=== > > > +Communications of user and kernel involve both directions. The > > > +supported user-kernel APIs are as follows: > > > + > > > +1. Alloc/Free PASID > > > +2. Bind/unbind guest PASID (e.g. Intel VT-d) > > > +3. Bind/unbind guest PASID table (e.g. ARM sMMU) > > > +4. Invalidate IOMMU caches > > > +5. Service page requests > > > + > > > +Requirements > > > + > > > +The IOMMU UAPIs are generic and extensible to meet the following > > > +requirements: > > > + > > > +1. Emulated and para-virtualised vIOMMUs > > > +2. Multiple vendors (Intel VT-d, ARM sMMU, etc.) > > > +3. Extensions to the UAPI shall not break existing user space > > > + > > > +Interfaces > > > +== > > > +Although the data structures defined in IOMMU UAPI are > > > self-contained, +there is no user API functions introduced. > > > Instead, IOMMU UAPI is +designed to work with existing user driver > > > frameworks such as VFIO. + > > > +Extension Rules & Precautions > > > +- > > > +When IOMMU UAPI gets extended, the data structures can *only* be > > > +modified in two ways: > > > + > > > +1. Adding new fields by re-purposing the padding[] field. No size > > > change. +2. Adding new union members at the end. May increase in > > > size. + > > > +No new fields can be added *after* the variable sized union in > > > that it +will break backward compatibility when offset moves. In > > > both cases, a +new flag must be accompanied with a new field such > > > that the IOMMU +driver can process the data based on the new flag. > > > Version field is +only reserved for the unlikely event of UAPI > > > upgrade at its entirety. + > > > +It's *always* the caller's responsibility to indicate the size of > > > the +structure passed by setting argsz appropriately. > > > +Though at the same time, argsz is user provided data which is not > > > +trusted. The argsz field allows the user to indicate how much data > > > +they're providing, it's still the kernel's responsibility to > > > validate +whether it's correct and sufficient for the requested > > > operation. + > > > +Compatibility Checking > > > +-- > > > +When IOMMU UAPI extension results in size increase, user such as > > > VFIO +has to handle the following cases: > > > + > > > +1. User and kernel has exact size match > > > +2. An older user with older kernel header (smaller UAPI size) > > > running on a > > > + newer kernel (larger UAPI size) > > > +3. A newer user with newer kernel header (larger UAPI size) running > > > + on an older kernel. > > > +4. A malicious/misbehaving user pass illegal/invalid size but > > > within > > > + range. The data may contain garbage. > > > > What exactly does vfio need to do to handle these? > > > VFIO does nothing other than returning the status from IOMMU driver. > Based on the return status, users such as QEMU can cause fault > conditions within the vIOMMU. But from above description, "user such as VFIO has to handle the following cases"... Thanks Kevin > > > > + > > > +Feature Checking > > > + > > > +While launching
RE: [PATCH v3 02/14] iommu: Report domain nesting info
> From: Liu, Yi L > Sent: Monday, June 29, 2020 8:23 PM > > Hi Stefan, > > > From: Stefan Hajnoczi > > Sent: Monday, June 29, 2020 5:25 PM > > > > On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote: > > > +/* > > > + * struct iommu_nesting_info - Information for nesting-capable IOMMU. > > > + * user space should check it before using > > > + * nesting capability. > > > + * > > > + * @size:size of the whole structure > > > + * @format: PASID table entry format, the same definition with > > > + * @format of struct iommu_gpasid_bind_data. > > > + * @features:supported nesting features. > > > + * @flags: currently reserved for future extension. > > > + * @data:vendor specific cap info. > > > + * > > > + * +---++ > > > + * | feature | Notes | > > > + * > > > +===+=== > > > =+ > > > + * | SYSWIDE_PASID | Kernel manages PASID in system wide, PASIDs > used | > > > + * | | in the system should be allocated by host kernel | > > > + * +---++ > > > + * | BIND_PGTBL| bind page tables to host PASID, the PASID could | > > > + * | | either be a host PASID passed in bind request or | > > > + * | | default PASIDs (e.g. default PASID of aux-domain) | > > > + * +---++ > > > + * | CACHE_INVLD | mandatory feature for nesting capable IOMMU > | > > > + * +---++ > > > > This feature description is vague about what CACHE_INVLD does and how > to > > use it. If I understand correctly, the presence of this feature means > > that VFIO_IOMMU_NESTING_OP_CACHE_INVLD must be used? > > > > The same kind of clarification could be done for SYSWIDE_PASID and > > BIND_PGTBL too. > > For SYSWIDE_PASID and BIND_PGTBL, yes, presence of the feature bit > means must use. So the two are requirements to user space if it wants > to setup nesting. While for CACHE_INVLD, it's kind of availability > here. How about removing CACHE_INVLD as presence of BIND_PGTBL should > indicates support of CACHE_INVLD? > So far this assumption is correct but it may not be true when thinking forward. For example, a vendor might find a way to allow the owner of 1st-level page table to directly invalidate cache w/o going through host IOMMU driver. From this angle I feel explicitly reporting this capability is more robust. Regarding to the description, what about below? -- SYSWIDE_PASID: PASIDs are managed in system-wide, instead of per device. When a device is assigned to userspace or VM, proper uAPI (provided by userspace driver framework, e.g. VFIO) must be used to allocate/free PASIDs for the assigned device. BIND_PGTBL: The owner of the first-level/stage-1 page table must explicitly bind the page table to associated PASID (either the one specified in bind request or the default PASID of the iommu domain), through VFIO_IOMMU _NESTING_OP CACHE_INVLD: The owner of the first-level/stage-1 page table must explicitly invalidate the IOMMU cache through VFIO_IOMMU_NESTING_OP, according to vendor-specific requirement when changing the page table. -- Thanks Kevin ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH] xen: introduce xen_vring_use_dma
> Subject: RE: [PATCH] xen: introduce xen_vring_use_dma > > On Mon, 29 Jun 2020, Peng Fan wrote: > > > > If that is the case, how is it possible that virtio breaks on ARM > > > > using the default dma_ops? The breakage is not Xen related (except > > > > that Xen turns dma_ops on). The original message from Peng was: > > > > > > > > vring_map_one_sg -> vring_use_dma_api > > > >-> dma_map_page > > > >-> __swiotlb_map_page > > > > ->swiotlb_map_page > > > > > > > > ->__dma_map_area(phys_to_virt(dma_to_phys(dev, > > > dev_addr)), size, dir); > > > > However we are using per device dma area for rpmsg, phys_to_virt > > > > could not return a correct virtual address for virtual address in > > > > vmalloc area. Then kernel panic. > > > > > > > > I must be missing something. Maybe it is because it has to do with > RPMesg? > > > > > > I think it's an RPMesg bug, yes > > > > rpmsg bug is another issue, it should not use dma_alloc_coherent for > > reserved area, and use vmalloc_to_page. > > > > Anyway here using dma api will also trigger issue. > > Is the stack trace above for the RPMesg issue or for the Trusty issue? The stack trace you pasted is rpmsg issue. > If it is the stack trace for RPMesg, can you also post the Trusty stack > trace? Or > are they indentical? There is no stack dump here. It successfully using swiotlb to do a map, but we actually no need swiotlb in domu to do the map. Thanks, Peng. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v3 02/14] iommu: Report domain nesting info
> From: Liu, Yi L > Sent: Saturday, June 27, 2020 2:53 PM > > Hi Robin, > > > From: Robin Murphy > > Sent: Saturday, June 27, 2020 12:05 AM > > > > On 2020-06-26 08:47, Jean-Philippe Brucker wrote: > > > On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote: > > >> IOMMUs that support nesting translation needs report the capability > > >> info to userspace, e.g. the format of first level/stage paging > > >> structures. > > >> > > >> This patch reports nesting info by DOMAIN_ATTR_NESTING. Caller can > > >> get nesting info after setting DOMAIN_ATTR_NESTING. > > >> > > >> v2 -> v3: > > >> *) remvoe cap/ecap_mask in iommu_nesting_info. > > >> *) reuse DOMAIN_ATTR_NESTING to get nesting info. > > >> *) return an empty iommu_nesting_info for SMMU drivers per Jean' > > >> suggestion. > > >> > > >> Cc: Kevin Tian > > >> CC: Jacob Pan > > >> Cc: Alex Williamson > > >> Cc: Eric Auger > > >> Cc: Jean-Philippe Brucker > > >> Cc: Joerg Roedel > > >> Cc: Lu Baolu > > >> Signed-off-by: Liu Yi L > > >> Signed-off-by: Jacob Pan > > >> --- > > >> drivers/iommu/arm-smmu-v3.c | 29 -- > > >> drivers/iommu/arm-smmu.c| 29 -- > > > > > > Looks reasonable to me. Please move the SMMU changes to a separate > > > patch and Cc the SMMU maintainers: > > > > Cheers Jean, I'll admit I've been skipping over a lot of these patches > > lately :) > > > > A couple of comments below... > > > > > > > > Cc: Will Deacon > > > Cc: Robin Murphy > > > > > > Thanks, > > > Jean > > > > > >> include/uapi/linux/iommu.h | 59 > > + > > >> 3 files changed, 113 insertions(+), 4 deletions(-) > > >> > > >> diff --git a/drivers/iommu/arm-smmu-v3.c > > >> b/drivers/iommu/arm-smmu-v3.c index f578677..0c45d4d 100644 > > >> --- a/drivers/iommu/arm-smmu-v3.c > > >> +++ b/drivers/iommu/arm-smmu-v3.c > > >> @@ -3019,6 +3019,32 @@ static struct iommu_group > > *arm_smmu_device_group(struct device *dev) > > >> return group; > > >> } > > >> > > >> +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain > > *smmu_domain, > > >> +void *data) > > >> +{ > > >> +struct iommu_nesting_info *info = (struct iommu_nesting_info *) > data; > > >> +u32 size; > > >> + > > >> +if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED) > > >> +return -ENODEV; > > >> + > > >> +size = sizeof(struct iommu_nesting_info); > > >> + > > >> +/* > > >> + * if provided buffer size is not equal to the size, should > > >> + * return 0 and also the expected buffer size to caller. > > >> + */ > > >> +if (info->size != size) { > > >> +info->size = size; > > >> +return 0; > > >> +} > > >> + > > >> +/* report an empty iommu_nesting_info for now */ > > >> +memset(info, 0x0, size); > > >> +info->size = size; > > >> +return 0; > > >> +} > > >> + > > >> static int arm_smmu_domain_get_attr(struct iommu_domain *domain, > > >> enum iommu_attr attr, void *data) > > >> { > > >> @@ -3028,8 +3054,7 @@ static int arm_smmu_domain_get_attr(struct > > iommu_domain *domain, > > >> case IOMMU_DOMAIN_UNMANAGED: > > >> switch (attr) { > > >> case DOMAIN_ATTR_NESTING: > > >> -*(int *)data = (smmu_domain->stage == > > ARM_SMMU_DOMAIN_NESTED); > > >> -return 0; > > >> +return > arm_smmu_domain_nesting_info(smmu_domain, > > data); > > >> default: > > >> return -ENODEV; > > >> } > > >> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > >> index 243bc4c..908607d 100644 > > >> --- a/drivers/iommu/arm-smmu.c > > >> +++ b/drivers/iommu/arm-smmu.c > > >> @@ -1506,6 +1506,32 @@ static struct iommu_group > > *arm_smmu_device_group(struct device *dev) > > >> return group; > > >> } > > >> > > >> +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain > > *smmu_domain, > > >> +void *data) > > >> +{ > > >> +struct iommu_nesting_info *info = (struct iommu_nesting_info *) > data; > > >> +u32 size; > > >> + > > >> +if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED) > > >> +return -ENODEV; > > >> + > > >> +size = sizeof(struct iommu_nesting_info); > > >> + > > >> +/* > > >> + * if provided buffer size is not equal to the size, should > > >> + * return 0 and also the expected buffer size to caller. > > >> + */ > > >> +if (info->size != size) { > > >> +info->size = size; > > >> +return 0; > > >> +} > > >> + > > >> +/* report an empty iommu_nesting_info for now */ > > >> +m
Re: [PATCH 1/2] iommu: Add iommu_group_get/set_domain()
Hi Robin, On 6/29/20 7:56 PM, Robin Murphy wrote: On 2020-06-27 04:15, Lu Baolu wrote: The hardware assistant vfio mediated device is a use case of iommu aux-domain. The interactions between vfio/mdev and iommu during mdev creation and passthr are: - Create a group for mdev with iommu_group_alloc(); - Add the device to the group with group = iommu_group_alloc(); if (IS_ERR(group)) return PTR_ERR(group); ret = iommu_group_add_device(group, &mdev->dev); if (!ret) dev_info(&mdev->dev, "MDEV: group_id = %d\n", iommu_group_id(group)); - Allocate an aux-domain iommu_domain_alloc() - Attach the aux-domain to the physical device from which the mdev is created. iommu_aux_attach_device() In the whole process, an iommu group was allocated for the mdev and an iommu domain was attached to the group, but the group->domain leaves NULL. As the result, iommu_get_domain_for_dev() doesn't work anymore. This adds iommu_group_get/set_domain() so that group->domain could be managed whenever a domain is attached or detached through the aux-domain api's. Letting external callers poke around directly in the internals of iommu_group doesn't look right to me. Unfortunately, it seems that the vifo iommu abstraction is deeply bound to the IOMMU subsystem. We can easily find other examples: iommu_group_get/set_iommudata() iommu_group_get/set_name() ... If a regular device is attached to one or more aux domains for PASID use, iommu_get_domain_for_dev() is still going to return the primary domain, so why should it be expected to behave differently for mediated Unlike the normal device attach, we will encounter two devices when it comes to aux-domain. - Parent physical device - this might be, for example, a PCIe device with PASID feature support, hence it is able to tag an unique PASID for DMA transfers originated from its subset. The device driver hence is able to wrapper this subset into an isolated: - Mediated device - a fake device created by the device driver mentioned above. Yes. All you mentioned are right for the parent device. But for mediated device, iommu_get_domain_for_dev() doesn't work even it has an valid iommu_group and iommu_domain. iommu_get_domain_for_dev() is a necessary interface for device drivers which want to support aux-domain. For example, struct iommu_domain *domain; struct device *dev = mdev_dev(mdev); unsigned long pasid; domain = iommu_get_domain_for_dev(dev); if (!domain) return -ENODEV; pasid = iommu_aux_get_pasid(domain, dev->parent); if (pasid == IOASID_INVALID) return -EINVAL; /* Program the device context with the PASID value */ Without this fix, iommu_get_domain_for_dev() always returns NULL and the device driver has no means to support aux-domain. Best regards, baolu devices? AFAICS it's perfectly legitimate to have no primary domain if traffic-without-PASID is invalid. Robin. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v8 3/3] iommu/arm-smmu: Add global/context fault implementation hooks
On Mon, Jun 29, 2020 at 05:10:51PM -0700, Krishna Reddy wrote: > Add global/context fault hooks to allow NVIDIA SMMU implementation > handle faults across multiple SMMUs. > > Signed-off-by: Krishna Reddy Reviewed-by: Nicolin Chen ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v8 1/3] iommu/arm-smmu: add NVIDIA implementation for dual ARM MMU-500 usage
On Mon, Jun 29, 2020 at 05:10:49PM -0700, Krishna Reddy wrote: > NVIDIA's Tegra194 SoC uses two ARM MMU-500s together to interleave > IOVA accesses across them. > Add NVIDIA implementation for dual ARM MMU-500s and add new compatible > string for Tegra194 SoC SMMU topology. > > Signed-off-by: Krishna Reddy Reviewed-by: Nicolin Chen ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 3/3] iommu/arm-smmu: Add global/context fault implementation hooks
Add global/context fault hooks to allow NVIDIA SMMU implementation handle faults across multiple SMMUs. Signed-off-by: Krishna Reddy --- drivers/iommu/arm-smmu-nvidia.c | 98 + drivers/iommu/arm-smmu.c| 17 +- drivers/iommu/arm-smmu.h| 3 + 3 files changed, 116 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c index 1124f0ac1823a..c9423b4199c65 100644 --- a/drivers/iommu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm-smmu-nvidia.c @@ -147,6 +147,102 @@ static int nvidia_smmu_reset(struct arm_smmu_device *smmu) return 0; } +static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct arm_smmu_domain, domain); +} + +static irqreturn_t nvidia_smmu_global_fault_inst(int irq, + struct arm_smmu_device *smmu, + int inst) +{ + u32 gfsr, gfsynr0, gfsynr1, gfsynr2; + void __iomem *gr0_base = nvidia_smmu_page(smmu, inst, 0); + + gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); + if (!gfsr) + return IRQ_NONE; + + gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0); + gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1); + gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); + + dev_err_ratelimited(smmu->dev, + "Unexpected global fault, this could be serious\n"); + dev_err_ratelimited(smmu->dev, + "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", + gfsr, gfsynr0, gfsynr1, gfsynr2); + + writel_relaxed(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR); + return IRQ_HANDLED; +} + +static irqreturn_t nvidia_smmu_global_fault(int irq, void *dev) +{ + int inst; + irqreturn_t irq_ret = IRQ_NONE; + struct arm_smmu_device *smmu = dev; + struct nvidia_smmu *nvidia_smmu = to_nvidia_smmu(smmu); + + for (inst = 0; inst < nvidia_smmu->num_inst; inst++) { + irq_ret = nvidia_smmu_global_fault_inst(irq, smmu, inst); + if (irq_ret == IRQ_HANDLED) + return irq_ret; + } + + return irq_ret; +} + +static irqreturn_t nvidia_smmu_context_fault_bank(int irq, + struct arm_smmu_device *smmu, + int idx, int inst) +{ + u32 fsr, fsynr, cbfrsynra; + unsigned long iova; + void __iomem *gr1_base = nvidia_smmu_page(smmu, inst, 1); + void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx); + + fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); + if (!(fsr & ARM_SMMU_FSR_FAULT)) + return IRQ_NONE; + + fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); + iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR); + cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(idx)); + + dev_err_ratelimited(smmu->dev, + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", + fsr, iova, fsynr, cbfrsynra, idx); + + writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR); + return IRQ_HANDLED; +} + +static irqreturn_t nvidia_smmu_context_fault(int irq, void *dev) +{ + int inst, idx; + irqreturn_t irq_ret = IRQ_NONE; + struct iommu_domain *domain = dev; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; + + for (inst = 0; inst < to_nvidia_smmu(smmu)->num_inst; inst++) { + /* +* Interrupt line shared between all context faults. +* Check for faults across all contexts. +*/ + for (idx = 0; idx < smmu->num_context_banks; idx++) { + irq_ret = nvidia_smmu_context_fault_bank(irq, smmu, +idx, inst); + + if (irq_ret == IRQ_HANDLED) + return irq_ret; + } + } + + return irq_ret; +} + static const struct arm_smmu_impl nvidia_smmu_impl = { .read_reg = nvidia_smmu_read_reg, .write_reg = nvidia_smmu_write_reg, @@ -154,6 +250,8 @@ static const struct arm_smmu_impl nvidia_smmu_impl = { .write_reg64 = nvidia_smmu_write_reg64, .reset = nvidia_smmu_reset, .tlb_sync = nvidia_smmu_tlb_sync, + .global_fault = nvidia_smmu_global_fault, + .context_fault = nvidia_smmu_context_fault, }; struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 243bc4cb2705b..3bb0aba15a356 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -673,6 +673,7 @@ static int arm_smmu_init_
[PATCH v8 2/3] dt-bindings: arm-smmu: Add binding for Tegra194 SMMU
Add binding for NVIDIA's Tegra194 SoC SMMU topology that is based on ARM MMU-500. Signed-off-by: Krishna Reddy --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 5 + 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index d7ceb4c34423b..5b2586ac715ed 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -38,6 +38,11 @@ properties: - qcom,sc7180-smmu-500 - qcom,sdm845-smmu-500 - const: arm,mmu-500 + - description: NVIDIA SoCs that use more than one "arm,mmu-500" +items: + - enum: + - nvdia,tegra194-smmu + - const: arm,mmu-500 - items: - const: arm,mmu-500 - const: arm,smmu-v2 -- 2.26.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 0/3] Nvidia Arm SMMUv2 Implementation
Changes in v8: Fixed incorrect CB_FSR read issue during context bank fault. Rebased and validated patches on top of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next v7 - https://lkml.org/lkml/2020/6/28/347 v6 - https://lkml.org/lkml/2020/6/4/1018 v5 - https://lkml.org/lkml/2020/5/21/1114 v4 - https://lkml.org/lkml/2019/10/30/1054 v3 - https://lkml.org/lkml/2019/10/18/1601 v2 - https://lkml.org/lkml/2019/9/2/980 v1 - https://lkml.org/lkml/2019/8/29/1588 Krishna Reddy (3): iommu/arm-smmu: add NVIDIA implementation for dual ARM MMU-500 usage dt-bindings: arm-smmu: Add binding for Tegra194 SMMU iommu/arm-smmu: Add global/context fault implementation hooks .../devicetree/bindings/iommu/arm,smmu.yaml | 5 + MAINTAINERS | 2 + drivers/iommu/Makefile| 2 +- drivers/iommu/arm-smmu-impl.c | 3 + drivers/iommu/arm-smmu-nvidia.c | 294 ++ drivers/iommu/arm-smmu.c | 17 +- drivers/iommu/arm-smmu.h | 4 + 7 files changed, 324 insertions(+), 3 deletions(-) create mode 100644 drivers/iommu/arm-smmu-nvidia.c base-commit: 48f0bcfb7aad2c6eb4c1e66476b58475aa14393e -- 2.26.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v8 1/3] iommu/arm-smmu: add NVIDIA implementation for dual ARM MMU-500 usage
NVIDIA's Tegra194 SoC uses two ARM MMU-500s together to interleave IOVA accesses across them. Add NVIDIA implementation for dual ARM MMU-500s and add new compatible string for Tegra194 SoC SMMU topology. Signed-off-by: Krishna Reddy --- MAINTAINERS | 2 + drivers/iommu/Makefile | 2 +- drivers/iommu/arm-smmu-impl.c | 3 + drivers/iommu/arm-smmu-nvidia.c | 196 drivers/iommu/arm-smmu.h| 1 + 5 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/arm-smmu-nvidia.c diff --git a/MAINTAINERS b/MAINTAINERS index 7b5ffd646c6b9..64c37dbdd4426 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16808,8 +16808,10 @@ F: drivers/i2c/busses/i2c-tegra.c TEGRA IOMMU DRIVERS M: Thierry Reding +R: Krishna Reddy L: linux-te...@vger.kernel.org S: Supported +F: drivers/iommu/arm-smmu-nvidia.c F: drivers/iommu/tegra* TEGRA KBC DRIVER diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 342190196dfb0..2b8203db73ec3 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_AMD_IOMMU) += amd/iommu.o amd/init.o amd/quirks.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd/debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += amd/iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o -arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o +arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += intel/dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel/iommu.o intel/pasid.o diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index c75b9d957b702..70f7318017617 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -171,6 +171,9 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) if (of_property_read_bool(np, "calxeda,smmu-secure-config-access")) smmu->impl = &calxeda_impl; + if (of_device_is_compatible(smmu->dev->of_node, "nvidia,tegra194-smmu")) + return nvidia_smmu_impl_init(smmu); + if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") || of_device_is_compatible(np, "qcom,sc7180-smmu-500")) return qcom_smmu_impl_init(smmu); diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c new file mode 100644 index 0..1124f0ac1823a --- /dev/null +++ b/drivers/iommu/arm-smmu-nvidia.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +// NVIDIA ARM SMMU v2 implementation quirks +// Copyright (C) 2019-2020 NVIDIA CORPORATION. All rights reserved. + +#include +#include +#include +#include +#include + +#include "arm-smmu.h" + +/* + * Tegra194 has three ARM MMU-500 Instances. + * Two of them are used together for interleaved IOVA accesses and + * used by non-isochronous HW devices for SMMU translations. + * Third one is used for SMMU translations from isochronous HW devices. + * It is possible to use this implementation to program either + * all three or two of the instances identically as desired through + * DT node. + * + * Programming all the three instances identically comes with redundant TLB + * invalidations as all three never need to be TLB invalidated for a HW device. + * + * When Linux kernel supports multiple SMMU devices, the SMMU device used for + * isochornous HW devices should be added as a separate ARM MMU-500 device + * in DT and be programmed independently for efficient TLB invalidates. + */ +#define MAX_SMMU_INSTANCES 3 + +#define TLB_LOOP_TIMEOUT_IN_US 100 /* 1s! */ +#define TLB_SPIN_COUNT 10 + +struct nvidia_smmu { + struct arm_smmu_device smmu; + unsigned intnum_inst; + void __iomem*bases[MAX_SMMU_INSTANCES]; +}; + +static inline struct nvidia_smmu *to_nvidia_smmu(struct arm_smmu_device *smmu) +{ + return container_of(smmu, struct nvidia_smmu, smmu); +} + +static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu, + unsigned int inst, int page) +{ + struct nvidia_smmu *nvidia_smmu = to_nvidia_smmu(smmu); + + if (!nvidia_smmu->bases[0]) + nvidia_smmu->bases[0] = smmu->base; + + return nvidia_smmu->bases[inst] + (page << smmu->pgshift); +} + +static u32 nvidia_smmu_read_reg(struct arm_smmu_device *smmu, + int page, int offset) +{ + void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset; + + return readl_relaxed(reg); +} + +static void nvidia_smmu_write_reg(struct arm_smmu_device *smmu, + int page, int offset, u32 val) +{ + unsigned int i; + struct nvidia_smmu *nvidia_smmu = to_nvidia_smmu(smmu); + + for (i = 0; i < nvidia_smmu->num_inst; i++) { + void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset; +
Re: [PATCH v7 1/3] iommu/arm-smmu: add NVIDIA implementation for dual ARM MMU-500 usage
On Mon, Jun 29, 2020 at 10:49:31PM +, Krishna Reddy wrote: > >> + if (!nvidia_smmu->bases[0]) > >> + nvidia_smmu->bases[0] = smmu->base; > >> + > >> + return nvidia_smmu->bases[inst] + (page << smmu->pgshift); } > > >Not critical -- just a nit: why not put the bases[0] in init()? > > smmu->base is not available during nvidia_smmu_impl_init() call. It is set > afterwards in arm-smmu.c. > It can't be avoided without changing the devm_ioremap() and impl_init() call > order in arm-smmu.c. I see...just checked arm_ssmu_impl_init(). ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH] xen: introduce xen_vring_use_dma
On Mon, 29 Jun 2020, Peng Fan wrote: > > > If that is the case, how is it possible that virtio breaks on ARM > > > using the default dma_ops? The breakage is not Xen related (except > > > that Xen turns dma_ops on). The original message from Peng was: > > > > > > vring_map_one_sg -> vring_use_dma_api > > >-> dma_map_page > > > -> __swiotlb_map_page > > > ->swiotlb_map_page > > > > > > ->__dma_map_area(phys_to_virt(dma_to_phys(dev, > > dev_addr)), size, dir); > > > However we are using per device dma area for rpmsg, phys_to_virt > > > could not return a correct virtual address for virtual address in > > > vmalloc area. Then kernel panic. > > > > > > I must be missing something. Maybe it is because it has to do with RPMesg? > > > > I think it's an RPMesg bug, yes > > rpmsg bug is another issue, it should not use dma_alloc_coherent for reserved > area, > and use vmalloc_to_page. > > Anyway here using dma api will also trigger issue. Is the stack trace above for the RPMesg issue or for the Trusty issue? If it is the stack trace for RPMesg, can you also post the Trusty stack trace? Or are they indentical? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] xen: introduce xen_vring_use_dma
On Fri, 26 Jun 2020, Michael S. Tsirkin wrote: > On Thu, Jun 25, 2020 at 10:31:27AM -0700, Stefano Stabellini wrote: > > On Wed, 24 Jun 2020, Michael S. Tsirkin wrote: > > > On Wed, Jun 24, 2020 at 02:53:54PM -0700, Stefano Stabellini wrote: > > > > On Wed, 24 Jun 2020, Michael S. Tsirkin wrote: > > > > > On Wed, Jun 24, 2020 at 10:59:47AM -0700, Stefano Stabellini wrote: > > > > > > On Wed, 24 Jun 2020, Michael S. Tsirkin wrote: > > > > > > > On Wed, Jun 24, 2020 at 05:17:32PM +0800, Peng Fan wrote: > > > > > > > > Export xen_swiotlb for all platforms using xen swiotlb > > > > > > > > > > > > > > > > Use xen_swiotlb to determine when vring should use dma APIs to > > > > > > > > map the > > > > > > > > ring: when xen_swiotlb is enabled the dma API is required. When > > > > > > > > it is > > > > > > > > disabled, it is not required. > > > > > > > > > > > > > > > > Signed-off-by: Peng Fan > > > > > > > > > > > > > > Isn't there some way to use VIRTIO_F_IOMMU_PLATFORM for this? > > > > > > > Xen was there first, but everyone else is using that now. > > > > > > > > > > > > Unfortunately it is complicated and it is not related to > > > > > > VIRTIO_F_IOMMU_PLATFORM :-( > > > > > > > > > > > > > > > > > > The Xen subsystem in Linux uses dma_ops via swiotlb_xen to translate > > > > > > foreign mappings (memory coming from other VMs) to physical > > > > > > addresses. > > > > > > On x86, it also uses dma_ops to translate Linux's idea of a physical > > > > > > address into a real physical address (this is unneeded on ARM.) > > > > > > > > > > > > > > > > > > So regardless of VIRTIO_F_IOMMU_PLATFORM, dma_ops should be used on > > > > > > Xen/x86 > > > > > > always and on Xen/ARM if Linux is Dom0 (because it has foreign > > > > > > mappings.) That is why we have the if (xen_domain) return true; in > > > > > > vring_use_dma_api. > > > > > > > > > > VIRTIO_F_IOMMU_PLATFORM makes guest always use DMA ops. > > > > > > > > > > Xen hack predates VIRTIO_F_IOMMU_PLATFORM so it *also* > > > > > forces DMA ops even if VIRTIO_F_IOMMU_PLATFORM is clear. > > > > > > > > > > Unfortunately as a result Xen never got around to > > > > > properly setting VIRTIO_F_IOMMU_PLATFORM. > > > > > > > > I don't think VIRTIO_F_IOMMU_PLATFORM would be correct for this because > > > > the usage of swiotlb_xen is not a property of virtio, > > > > > > > > > Basically any device without VIRTIO_F_ACCESS_PLATFORM > > > (that is it's name in latest virtio spec, VIRTIO_F_IOMMU_PLATFORM is > > > what linux calls it) is declared as "special, don't follow normal rules > > > for access". > > > > > > So yes swiotlb_xen is not a property of virtio, but what *is* a property > > > of virtio is that it's not special, just a regular device from DMA POV. > > > > I am trying to understand what you meant but I think I am missing > > something. > > > > Are you saying that modern virtio should always have > > VIRTIO_F_ACCESS_PLATFORM, hence use normal dma_ops as any other devices? > > I am saying it's a safe default. Clear VIRTIO_F_ACCESS_PLATFORM if you > have some special needs e.g. you are very sure it's ok to bypass DMA > ops, or you need to support a legacy guest (produced in the window > between virtio 1 support in 2014 and support for > VIRTIO_F_ACCESS_PLATFORM in 2016). Ok thanks. I understand and it makes sense to me. > > > > > > You might have noticed that I missed one possible case above: > > > > > > Xen/ARM > > > > > > DomU :-) > > > > > > > > > > > > Xen/ARM domUs don't need swiotlb_xen, it is not even initialized. > > > > > > So if > > > > > > (xen_domain) return true; would give the wrong answer in that case. > > > > > > Linux would end up calling the "normal" dma_ops, not swiotlb-xen, > > > > > > and > > > > > > the "normal" dma_ops fail. > > > > > > > > > > > > > > > > > > The solution I suggested was to make the check in vring_use_dma_api > > > > > > more > > > > > > flexible by returning true if the swiotlb_xen is supposed to be > > > > > > used, > > > > > > not in general for all Xen domains, because that is what the check > > > > > > was > > > > > > really meant to do. > > > > > > > > > > Why not fix DMA ops so they DTRT (nop) on Xen/ARM DomU? What is wrong > > > > > with that? > > > > > > > > swiotlb-xen is not used on Xen/ARM DomU, the default dma_ops are the > > > > ones that are used. So you are saying, why don't we fix the default > > > > dma_ops to work with virtio? > > > > > > > > It is bad that the default dma_ops crash with virtio, so yes I think it > > > > would be good to fix that. However, even if we fixed that, the if > > > > (xen_domain()) check in vring_use_dma_api is still a problem. > > > > > > Why is it a problem? It just makes virtio use DMA API. > > > If that in turn works, problem solved. > > > > You are correct in the sense that it would work. However I do think it > > is wrong for vring_use_dma_api to enable dma_ops/swiotlb-xen for Xen/ARM > > DomUs that don't need it. There are many
RE: [PATCH v7 3/3] iommu/arm-smmu: Add global/context fault implementation hooks
>> +static irqreturn_t nvidia_smmu_context_fault_bank(int irq, >> + void __iomem *cb_base = nvidia_smmu_page(smmu, inst, >> + smmu->numpage + idx); [...] >> + fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); [...] >> + writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR); >It reads FSR of the default inst (1st), but clears the FSR of corresponding >inst -- just want to make sure that this is okay and intended. FSR should be read from corresponding inst. Not from instance 0. Let me post updated patch. -KR ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 1/5] docs: IOMMU user API
On Fri, 26 Jun 2020 16:19:23 -0600 Alex Williamson wrote: > On Tue, 23 Jun 2020 10:03:53 -0700 > Jacob Pan wrote: > > > IOMMU UAPI is newly introduced to support communications between > > guest virtual IOMMU and host IOMMU. There has been lots of > > discussions on how it should work with VFIO UAPI and userspace in > > general. > > > > This document is indended to clarify the UAPI design and usage. The > > mechenics of how future extensions should be achieved are also > > covered in this documentation. > > > > Signed-off-by: Liu Yi L > > Signed-off-by: Jacob Pan > > --- > > Documentation/userspace-api/iommu.rst | 244 > > ++ 1 file changed, 244 insertions(+) > > create mode 100644 Documentation/userspace-api/iommu.rst > > > > diff --git a/Documentation/userspace-api/iommu.rst > > b/Documentation/userspace-api/iommu.rst new file mode 100644 > > index ..f9e4ed90a413 > > --- /dev/null > > +++ b/Documentation/userspace-api/iommu.rst > > @@ -0,0 +1,244 @@ > > +.. SPDX-License-Identifier: GPL-2.0 > > +.. iommu: > > + > > += > > +IOMMU Userspace API > > += > > + > > +IOMMU UAPI is used for virtualization cases where communications > > are +needed between physical and virtual IOMMU drivers. For native > > +usage, IOMMU is a system device which does not need to communicate > > +with user space directly. > > + > > +The primary use cases are guest Shared Virtual Address (SVA) and > > +guest IO virtual address (IOVA), wherein a virtual IOMMU (vIOMMU) > > is +required to communicate with the physical IOMMU in the host. > > + > > +.. contents:: :local: > > + > > +Functionalities > > +=== > > +Communications of user and kernel involve both directions. The > > +supported user-kernel APIs are as follows: > > + > > +1. Alloc/Free PASID > > +2. Bind/unbind guest PASID (e.g. Intel VT-d) > > +3. Bind/unbind guest PASID table (e.g. ARM sMMU) > > +4. Invalidate IOMMU caches > > +5. Service page requests > > + > > +Requirements > > + > > +The IOMMU UAPIs are generic and extensible to meet the following > > +requirements: > > + > > +1. Emulated and para-virtualised vIOMMUs > > +2. Multiple vendors (Intel VT-d, ARM sMMU, etc.) > > +3. Extensions to the UAPI shall not break existing user space > > + > > +Interfaces > > +== > > +Although the data structures defined in IOMMU UAPI are > > self-contained, +there is no user API functions introduced. > > Instead, IOMMU UAPI is +designed to work with existing user driver > > frameworks such as VFIO. + > > +Extension Rules & Precautions > > +- > > +When IOMMU UAPI gets extended, the data structures can *only* be > > +modified in two ways: > > + > > +1. Adding new fields by re-purposing the padding[] field. No size > > change. +2. Adding new union members at the end. May increase in > > size. + > > +No new fields can be added *after* the variable sized union in > > that it +will break backward compatibility when offset moves. In > > both cases, a +new flag must be accompanied with a new field such > > that the IOMMU +driver can process the data based on the new flag. > > Version field is +only reserved for the unlikely event of UAPI > > upgrade at its entirety. + > > +It's *always* the caller's responsibility to indicate the size of > > the +structure passed by setting argsz appropriately. > > +Though at the same time, argsz is user provided data which is not > > +trusted. The argsz field allows the user to indicate how much data > > +they're providing, it's still the kernel's responsibility to > > validate +whether it's correct and sufficient for the requested > > operation. + > > +Compatibility Checking > > +-- > > +When IOMMU UAPI extension results in size increase, user such as > > VFIO +has to handle the following cases: > > + > > +1. User and kernel has exact size match > > +2. An older user with older kernel header (smaller UAPI size) > > running on a > > + newer kernel (larger UAPI size) > > +3. A newer user with newer kernel header (larger UAPI size) running > > + on an older kernel. > > +4. A malicious/misbehaving user pass illegal/invalid size but > > within > > + range. The data may contain garbage. > > What exactly does vfio need to do to handle these? > VFIO does nothing other than returning the status from IOMMU driver. Based on the return status, users such as QEMU can cause fault conditions within the vIOMMU. > > + > > +Feature Checking > > + > > +While launching a guest with vIOMMU, it is important to ensure > > that host +can support the UAPI data structures to be used for > > vIOMMU-pIOMMU +communications. Without upfront compatibility > > checking, future faults +are difficult to report even in normal > > conditions. For example, TLB +invalidations should always succeed. > > There is no architectural way to +report back to the vIOMMU if the
RE: [PATCH v7 1/3] iommu/arm-smmu: add NVIDIA implementation for dual ARM MMU-500 usage
>> + if (!nvidia_smmu->bases[0]) >> + nvidia_smmu->bases[0] = smmu->base; >> + >> + return nvidia_smmu->bases[inst] + (page << smmu->pgshift); } >Not critical -- just a nit: why not put the bases[0] in init()? smmu->base is not available during nvidia_smmu_impl_init() call. It is set afterwards in arm-smmu.c. It can't be avoided without changing the devm_ioremap() and impl_init() call order in arm-smmu.c. -KR ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v7 3/3] iommu/arm-smmu: Add global/context fault implementation hooks
On Sun, Jun 28, 2020 at 07:28:38PM -0700, Krishna Reddy wrote: > Add global/context fault hooks to allow NVIDIA SMMU implementation > handle faults across multiple SMMUs. > > Signed-off-by: Krishna Reddy > +static irqreturn_t nvidia_smmu_global_fault_inst(int irq, > +struct arm_smmu_device *smmu, > +int inst) > +{ > + u32 gfsr, gfsynr0, gfsynr1, gfsynr2; > + void __iomem *gr0_base = nvidia_smmu_page(smmu, inst, 0); > + > + gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); > + gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0); > + gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1); > + gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); > + > + if (!gfsr) > + return IRQ_NONE; Could move this before gfsynr readings to save some readl() for !gfsr cases? > +static irqreturn_t nvidia_smmu_context_fault_bank(int irq, > + void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + > idx); [...] > + fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); [...] > + writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR); It reads FSR of the default inst (1st), but clears the FSR of corresponding inst -- just want to make sure that this is okay and intended. > @@ -185,7 +283,8 @@ struct arm_smmu_device *nvidia_smmu_impl_init(struct > arm_smmu_device *smmu) > } > > nvidia_smmu->smmu.impl = &nvidia_smmu_impl; > - /* Free the arm_smmu_device struct allocated in arm-smmu.c. > + /* > + * Free the arm_smmu_device struct allocated in arm-smmu.c. >* Once this function returns, arm-smmu.c would use arm_smmu_device >* allocated as part of nvidia_smmu struct. >*/ Hmm, this coding style fix should be probably squashed into PATCH-1? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v7 1/3] iommu/arm-smmu: add NVIDIA implementation for dual ARM MMU-500 usage
On Sun, Jun 28, 2020 at 07:28:36PM -0700, Krishna Reddy wrote: > NVIDIA's Tegra194 SoC uses two ARM MMU-500s together to interleave > IOVA accesses across them. > Add NVIDIA implementation for dual ARM MMU-500s and add new compatible > string for Tegra194 SoC SMMU topology. > > Signed-off-by: Krishna Reddy > +static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu, > +unsigned int inst, int page) > +{ > + struct nvidia_smmu *nvidia_smmu = to_nvidia_smmu(smmu); > + > + if (!nvidia_smmu->bases[0]) > + nvidia_smmu->bases[0] = smmu->base; > + > + return nvidia_smmu->bases[inst] + (page << smmu->pgshift); > +} Not critical -- just a nit: why not put the bases[0] in init()? Everything else looks good to me: Reviewed-by: Nicolin Chen ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: SUN50I_IOMMU should depend on HAS_DMA
On 2020-06-29 13:11, Geert Uytterhoeven wrote: If NO_DMA=y (e.g. Sun-3 all{mod,yes}-config): drivers/iommu/dma-iommu.o: In function `iommu_dma_mmap': dma-iommu.c:(.text+0x92e): undefined reference to `dma_pgprot' IOMMU_DMA must not be selected, unless HAS_DMA=y. Wait, no, IOMMU_DMA should not be selected by drivers at all - it's for arch code to choose. x86 just complicates matters with some of its arch code being in its IOMMU drivers... Robin. Hence fix this by making SUN50I_IOMMU depend on HAS_DMA. Fixes: 4100b8c229b32835 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Geert Uytterhoeven --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6dc49ed8377a5c12..b0f308cb7f7c2fc2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -305,6 +305,7 @@ config ROCKCHIP_IOMMU config SUN50I_IOMMU bool "Allwinner H6 IOMMU Support" + depends on HAS_DMA depends on ARCH_SUNXI || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH net] xsk: remove cheap_dma optimization
On 2020-06-29 17:18, Daniel Borkmann wrote: Nice, tossed from bpf tree then! (Looks like it didn't land on the bpf list yet, but seems other mails are currently stuck as well on vger. I presume it will be routed to Linus via Christoph?) Thanks! Christoph (according to the other mail) was OK taking the series via your bpf, Dave's net, or his dma-mapping tree. Björn ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCHv3 3/7] iommu/arm-smmu: Add domain attribute for system cache
Add iommu domain attribute for using system cache aka last level cache by client drivers like GPU to set right attributes for caching the hardware pagetables into the system cache. Signed-off-by: Sai Prakash Ranjan --- drivers/iommu/arm-smmu.c | 17 + drivers/iommu/arm-smmu.h | 1 + include/linux/iommu.h| 1 + 3 files changed, 19 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b2564f93d685..71b6f7038423 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -897,6 +897,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, goto out_unlock; } + if (smmu_domain->sys_cache) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_SYS_CACHE; + if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; @@ -1732,6 +1735,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: *(int *)data = smmu_domain->non_strict; return 0; + case DOMAIN_ATTR_SYS_CACHE: + *((int *)data) = smmu_domain->sys_cache; + return 0; default: return -ENODEV; } @@ -1763,6 +1769,17 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, else smmu_domain->stage = ARM_SMMU_DOMAIN_S1; break; + case DOMAIN_ATTR_SYS_CACHE: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*((int *)data)) + smmu_domain->sys_cache = true; + else + smmu_domain->sys_cache = false; + break; default: ret = -ENODEV; } diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 4a335ef3d97a..bbae48bdc022 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -348,6 +348,7 @@ struct arm_smmu_domain { struct iommu_domain domain; struct device *dev; /* Device attached to this domain */ boolaux; + boolsys_cache; }; struct arm_smmu_cb { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2388117641f1..a48edbebe3cb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -125,6 +125,7 @@ enum iommu_attr { DOMAIN_ATTR_NESTING,/* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_PGTABLE_CFG, + DOMAIN_ATTR_SYS_CACHE, DOMAIN_ATTR_MAX, }; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)
From: Sharat Masetty The last level system cache can be partitioned to 32 different slices of which GPU has two slices preallocated. One slice is used for caching GPU buffers and the other slice is used for caching the GPU SMMU pagetables. This talks to the core system cache driver to acquire the slice handles, configure the SCID's to those slices and activates and deactivates the slices upon GPU power collapse and restore. Some support from the IOMMU driver is also needed to make use of the system cache. IOMMU_SYS_CACHE_ONLY is a buffer protection flag which enables caching GPU data buffers in the system cache with memory attributes such as outer cacheable, read-allocate, write-allocate for buffers. The GPU then has the ability to override a few cacheability parameters which it does to override write-allocate to write-no-allocate as the GPU hardware does not benefit much from it. Similarly DOMAIN_ATTR_SYS_CACHE is another domain level attribute used by the IOMMU driver to set the right attributes to cache the hardware pagetables into the system cache. Signed-off-by: Sharat Masetty (sai: fix to set attr before device attach to IOMMU and rebase) Signed-off-by: Sai Prakash Ranjan --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 82 + drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 3 + drivers/gpu/drm/msm/adreno/adreno_gpu.c | 23 ++- drivers/gpu/drm/msm/msm_iommu.c | 3 + drivers/gpu/drm/msm/msm_mmu.h | 4 ++ 5 files changed, 114 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 6bee70853ea8..c33cd2a588e6 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -9,6 +9,8 @@ #include "a6xx_gmu.xml.h" #include +#include +#include #define GPU_PAS_ID 13 @@ -808,6 +810,79 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), }; +static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or) +{ + return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or); +} + +static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value) +{ + return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2)); +} + +static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu) +{ + llcc_slice_deactivate(a6xx_gpu->llc_slice); + llcc_slice_deactivate(a6xx_gpu->htw_llc_slice); +} + +static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) +{ + u32 cntl1_regval = 0; + + if (IS_ERR(a6xx_gpu->llc_mmio)) + return; + + if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { + u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); + + gpu_scid &= 0x1f; + cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | + (gpu_scid << 15) | (gpu_scid << 20); + } + + if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { + u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); + + gpuhtw_scid &= 0x1f; + cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); + } + + if (cntl1_regval) { + /* +* Program the slice IDs for the various GPU blocks and GPU MMU +* pagetables +*/ + a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); + + /* +* Program cacheability overrides to not allocate cache lines on +* a write miss +*/ + a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); + } +} + +static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) +{ + llcc_slice_putd(a6xx_gpu->llc_slice); + llcc_slice_putd(a6xx_gpu->htw_llc_slice); +} + +static void a6xx_llc_slices_init(struct platform_device *pdev, + struct a6xx_gpu *a6xx_gpu) +{ + a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); + if (IS_ERR(a6xx_gpu->llc_mmio)) + return; + + a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); + a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); + + if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice)) + a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); +} + static int a6xx_pm_resume(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -822,6 +897,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) msm_gpu_resume_devfreq(gpu); + a6xx_llc_activate(a6xx_gpu); + return 0; } @@ -830,6 +907,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + a6xx_llc_deactivate(a6xx_gpu); + devfreq_suspend_de
[PATCHv3 5/7] iommu: arm-smmu-impl: Convert to use of_match_node() for qcom impl
Use of_match_node() to match qcom implementation instead of multiple of_device_compatible() calls for each qcom implementation. Signed-off-by: Sai Prakash Ranjan --- drivers/iommu/arm-smmu-impl.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index 8fbab9c38b61..42020d50ce12 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -9,6 +9,12 @@ #include "arm-smmu.h" +static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { + { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sdm845-smmu-500" }, + { } +}; + static int arm_smmu_gr0_ns(int offset) { switch (offset) { @@ -168,8 +174,7 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) if (of_property_read_bool(np, "calxeda,smmu-secure-config-access")) smmu->impl = &calxeda_impl; - if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") || - of_device_is_compatible(np, "qcom,sc7180-smmu-500")) + if (of_match_node(qcom_smmu_impl_of_match, np)) return qcom_smmu_impl_init(smmu); if (of_device_is_compatible(smmu->dev->of_node, "qcom,adreno-smmu")) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCHv3 4/7] iommu: arm-smmu-impl: Remove unwanted extra blank lines
There are few places in arm-smmu-impl where there are extra blank lines, remove them and while at it fix the checkpatch warning for space required before the open parenthesis. Signed-off-by: Sai Prakash Ranjan --- drivers/iommu/arm-smmu-impl.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index 309675cf6699..8fbab9c38b61 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -9,10 +9,9 @@ #include "arm-smmu.h" - static int arm_smmu_gr0_ns(int offset) { - switch(offset) { + switch (offset) { case ARM_SMMU_GR0_sCR0: case ARM_SMMU_GR0_sACR: case ARM_SMMU_GR0_sGFSR: @@ -47,7 +46,6 @@ static const struct arm_smmu_impl calxeda_impl = { .write_reg = arm_smmu_write_ns, }; - struct cavium_smmu { struct arm_smmu_device smmu; u32 id_base; @@ -103,7 +101,6 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm return &cs->smmu; } - #define ARM_MMU500_ACTLR_CPRE (1 << 1) #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) @@ -148,7 +145,6 @@ static const struct arm_smmu_impl arm_mmu500_impl = { .reset = arm_mmu500_reset, }; - struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCHv3 6/7] drm/msm: rearrange the gpu_rmw() function
From: Sharat Masetty The register read-modify-write construct is generic enough that it can be used by other subsystems as needed, create a more generic rmw() function and have the gpu_rmw() use this new function. Signed-off-by: Sharat Masetty Reviewed-by: Jordan Crouse Signed-off-by: Sai Prakash Ranjan --- drivers/gpu/drm/msm/msm_drv.c | 8 drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_gpu.h | 5 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 0c219b954943..5aa070929220 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -166,6 +166,14 @@ u32 msm_readl(const void __iomem *addr) return val; } +void msm_rmw(void __iomem *addr, u32 mask, u32 or) +{ + u32 val = msm_readl(addr); + + val &= ~mask; + msm_writel(val | or, addr); +} + struct msm_vblank_work { struct work_struct work; int crtc_id; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 983a8b7e5a74..5bb02ccb863a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -417,6 +417,7 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, const char *dbgname); void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); +void msm_rmw(void __iomem *addr, u32 mask, u32 or); struct msm_gpu_submitqueue; int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index f1762b77bea8..3519777c8cb2 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -232,10 +232,7 @@ static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) { - uint32_t val = gpu_read(gpu, reg); - - val &= ~mask; - gpu_write(gpu, reg, val | or); + msm_rmw(gpu->mmio + (reg << 2), mask, or); } static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCHv3 2/7] iommu/io-pgtable-arm: Add support to use system cache
Add a quirk IO_PGTABLE_QUIRK_SYS_CACHE to override the attributes set in TCR for the page table walker when using system cache. Signed-off-by: Sai Prakash Ranjan --- drivers/iommu/io-pgtable-arm.c | 7 ++- include/linux/io-pgtable.h | 4 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 04fbd4bf0ff9..0a6cb82fd98a 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -792,7 +792,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NON_STRICT | - IO_PGTABLE_QUIRK_ARM_TTBR1)) + IO_PGTABLE_QUIRK_ARM_TTBR1 | + IO_PGTABLE_QUIRK_SYS_CACHE)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -804,6 +805,10 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) tcr->sh = ARM_LPAE_TCR_SH_IS; tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; + } else if (cfg->quirks & IO_PGTABLE_QUIRK_SYS_CACHE) { + tcr->sh = ARM_LPAE_TCR_SH_OS; + tcr->irgn = ARM_LPAE_TCR_RGN_NC; + tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; } else { tcr->sh = ARM_LPAE_TCR_SH_OS; tcr->irgn = ARM_LPAE_TCR_RGN_NC; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index bbed1d3925ba..23114b3fe2a5 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -86,6 +86,9 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table * for use in the upper half of a split address space. +* +* IO_PGTABLE_QUIRK_SYS_CACHE: Override the attributes set in TCR for +* the page table walker when using system cache. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -93,6 +96,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_EXTBIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) + #define IO_PGTABLE_QUIRK_SYS_CACHE BIT(6) unsigned long quirks; unsigned long pgsize_bitmap; unsigned intias; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCHv3 1/7] iommu/arm-smmu: Add a init_context_bank implementation hook
From: Jordan Crouse Add a new implementation hook to allow the implementation specific code to tweek the context bank configuration just before it gets written. The first user will be the Adreno GPU implementation to turn on SCTLR.HUPCF to ensure that a page fault doesn't terminating pending transactions. Doing so could hang the GPU if one of the terminated transactions is a CP read. This depends on the arm-smmu adreno SMMU implementation [1]. [1] https://lore.kernel.org/patchwork/patch/1264452/ Signed-off-by: Jordan Crouse Signed-off-by: Sai Prakash Ranjan --- drivers/iommu/arm-smmu-qcom.c | 13 + drivers/iommu/arm-smmu.c | 29 + drivers/iommu/arm-smmu.h | 12 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c index cb2acb6b19dd..6462fb00f493 100644 --- a/drivers/iommu/arm-smmu-qcom.c +++ b/drivers/iommu/arm-smmu-qcom.c @@ -17,6 +17,18 @@ static bool qcom_adreno_smmu_is_gpu_device(struct arm_smmu_domain *smmu_domain) return of_device_is_compatible(smmu_domain->dev->of_node, "qcom,adreno"); } +static void qcom_adreno_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_cb *cb) +{ + /* +* On the GPU device we want to process subsequent transactions after a +* fault to keep the GPU from hanging +*/ + + if (qcom_adreno_smmu_is_gpu_device(smmu_domain)) + cb->sctlr |= ARM_SMMU_SCTLR_HUPCF; +} + static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { @@ -92,6 +104,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = { .init_context = qcom_adreno_smmu_init_context, .def_domain_type = qcom_smmu_def_domain_type, .reset = qcom_smmu500_reset, + .init_context_bank = qcom_adreno_smmu_init_context_bank, }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 4bd247dfd703..b2564f93d685 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -86,14 +86,6 @@ struct arm_smmu_smr { boolvalid; }; -struct arm_smmu_cb { - u64 ttbr[2]; - u32 tcr[2]; - u32 mair[2]; - struct arm_smmu_cfg *cfg; - atomic_taux; -}; - struct arm_smmu_master_cfg { struct arm_smmu_device *smmu; s16 smendx[]; @@ -580,6 +572,18 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32; } } + + cb->sctlr = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE | + ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M; + + if (stage1) + cb->sctlr |= ARM_SMMU_SCTLR_S1_ASIDPNE; + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + cb->sctlr |= ARM_SMMU_SCTLR_E; + + /* Give the implementation a chance to adjust the configuration */ + if (smmu_domain->smmu->impl && smmu_domain->smmu->impl->init_context_bank) + smmu_domain->smmu->impl->init_context_bank(smmu_domain, cb); } static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) @@ -658,14 +662,7 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) } /* SCTLR */ - reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE | - ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M; - if (stage1) - reg |= ARM_SMMU_SCTLR_S1_ASIDPNE; - if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - reg |= ARM_SMMU_SCTLR_E; - - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, cb->sctlr); } /* diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 79d441024043..4a335ef3d97a 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -142,6 +142,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_SCTLR_S1_ASIDPNE BIT(12) +#define ARM_SMMU_SCTLR_HUPCF BIT(8) #define ARM_SMMU_SCTLR_CFCFG BIT(7) #define ARM_SMMU_SCTLR_CFIEBIT(6) #define ARM_SMMU_SCTLR_CFREBIT(5) @@ -349,6 +350,15 @@ struct arm_smmu_domain { boolaux; }; +struct arm_smmu_cb { + u64 ttbr[2]; + u32 tcr[2]; + u32 mair[2]; + u32 sctlr; + struct arm_smmu_cfg *cfg; + atomic_taux; +}; + static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) { u32 tcr = FIELD_PR
[PATCHv3 0/7] System Cache support for GPU and required SMMU support
Some hardware variants contain a system cache or the last level cache(llc). This cache is typically a large block which is shared by multiple clients on the SOC. GPU uses the system cache to cache both the GPU data buffers(like textures) as well the SMMU pagetables. This helps with improved render performance as well as lower power consumption by reducing the bus traffic to the system memory. The system cache architecture allows the cache to be split into slices which then be used by multiple SOC clients. This patch series is an effort to enable and use two of those slices perallocated for the GPU, one for the GPU data buffers and another for the GPU SMMU hardware pagetables. Patch 1 adds a init_context_bank implementation hook to set SCTLR.HUPCF. Patch 2,3,6,7 adds system cache support in SMMU and GPU driver. Patch 4 and 5 are minor cleanups for arm-smmu impl. Changes in v3: * Fix domain attribute setting to before iommu_attach_device() * Fix few code style and checkpatch warnings * Rebase on top of Jordan's latest split pagetables and per-instance pagetables support [1][2] Changes in v2: * Addressed review comments and rebased on top of Jordan's split pagetables series [1] https://lore.kernel.org/patchwork/cover/1264446/ [2] https://lore.kernel.org/patchwork/cover/1264460/ Jordan Crouse (1): iommu/arm-smmu: Add a init_context_bank implementation hook Sai Prakash Ranjan (4): iommu/io-pgtable-arm: Add support to use system cache iommu/arm-smmu: Add domain attribute for system cache iommu: arm-smmu-impl: Remove unwanted extra blank lines iommu: arm-smmu-impl: Convert to use of_match_node() for qcom impl Sharat Masetty (2): drm/msm: rearrange the gpu_rmw() function drm/msm/a6xx: Add support for using system cache(LLC) drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 82 + drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 3 + drivers/gpu/drm/msm/adreno/adreno_gpu.c | 23 ++- drivers/gpu/drm/msm/msm_drv.c | 8 +++ drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_gpu.h | 5 +- drivers/gpu/drm/msm/msm_iommu.c | 3 + drivers/gpu/drm/msm/msm_mmu.h | 4 ++ drivers/iommu/arm-smmu-impl.c | 13 ++-- drivers/iommu/arm-smmu-qcom.c | 13 drivers/iommu/arm-smmu.c| 46 +- drivers/iommu/arm-smmu.h| 13 drivers/iommu/io-pgtable-arm.c | 7 ++- include/linux/io-pgtable.h | 4 ++ include/linux/iommu.h | 1 + 15 files changed, 198 insertions(+), 28 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH net] xsk: remove cheap_dma optimization
On 2020-06-28 18:16, Björn Töpel wrote: On 2020-06-27 09:04, Christoph Hellwig wrote: On Sat, Jun 27, 2020 at 01:00:19AM +0200, Daniel Borkmann wrote: Given there is roughly a ~5 weeks window at max where this removal could still be applied in the worst case, could we come up with a fix / proposal first that moves this into the DMA mapping core? If there is something that can be agreed upon by all parties, then we could avoid re-adding the 9% slowdown. :/ I'd rather turn it upside down - this abuse of the internals blocks work that has basically just missed the previous window and I'm not going to wait weeks to sort out the API misuse. But we can add optimizations back later if we find a sane way. I'm not super excited about the performance loss, but I do get Christoph's frustration about gutting the DMA API making it harder for DMA people to get work done. Lets try to solve this properly using proper DMA APIs. That being said I really can't see how this would make so much of a difference. What architecture and what dma_ops are you using for those measurements? What is the workload? The 9% is for an AF_XDP (Fast raw Ethernet socket. Think AF_PACKET, but faster.) benchmark: receive the packet from the NIC, and drop it. The DMA syncs stand out in the perf top: 28.63% [kernel] [k] i40e_clean_rx_irq_zc 17.12% [kernel] [k] xp_alloc 8.80% [kernel] [k] __xsk_rcv_zc 7.69% [kernel] [k] xdp_do_redirect 5.35% bpf_prog_992d9ddc835e5629 [k] bpf_prog_992d9ddc835e5629 4.77% [kernel] [k] xsk_rcv.part.0 4.07% [kernel] [k] __xsk_map_redirect 3.80% [kernel] [k] dma_direct_sync_single_for_cpu 3.03% [kernel] [k] dma_direct_sync_single_for_device 2.76% [kernel] [k] i40e_alloc_rx_buffers_zc 1.83% [kernel] [k] xsk_flush ... For this benchmark the dma_ops are NULL (dma_is_direct() == true), and the main issue is that SWIOTLB is now unconditionally enabled [1] for x86, and for each sync we have to check that if is_swiotlb_buffer() which involves a some costly indirection. That was pretty much what my hack avoided. Instead we did all the checks upfront, since AF_XDP has long-term DMA mappings, and just set a flag for that. Avoiding the whole "is this address swiotlb" in dma_direct_sync_single_for_{cpu, device]() per-packet would help a lot. I'm pretty sure that's one of the things we hope to achieve with the generic bypass flag :) Somewhat related to the DMA API; It would have performance benefits for AF_XDP if the DMA range of the mapped memory was linear, i.e. by IOMMU utilization. I've started hacking a thing a little bit, but it would be nice if such API was part of the mapping core. Input: array of pages Output: array of dma addrs (and obviously dev, flags and such) For non-IOMMU len(array of pages) == len(array of dma addrs) For best-case IOMMU len(array of dma addrs) == 1 (large linear space) But that's for later. :-) FWIW you will typically get that behaviour from IOMMU-based implementations of dma_map_sg() right now, although it's not strictly guaranteed. If you can weather some additional setup cost of calling sg_alloc_table_from_pages() plus walking the list after mapping to test whether you did get a contiguous result, you could start taking advantage of it as some of the dma-buf code in DRM and v4l2 does already (although those cases actually treat it as a strict dependency rather than an optimisation). I'm inclined to agree that if we're going to see more of these cases, a new API call that did formally guarantee a DMA-contiguous mapping (either via IOMMU or bounce buffering) or failure might indeed be handy. Robin. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH net] xsk: remove cheap_dma optimization
On 6/29/20 5:10 PM, Björn Töpel wrote: On 2020-06-29 15:52, Daniel Borkmann wrote: Ok, fair enough, please work with DMA folks to get this properly integrated and restored then. Applied, thanks! Daniel, you were too quick! Please revert this one; Christoph just submitted a 4-patch-series that addresses both the DMA API, and the perf regression! Nice, tossed from bpf tree then! (Looks like it didn't land on the bpf list yet, but seems other mails are currently stuck as well on vger. I presume it will be routed to Linus via Christoph?) Thanks, Daniel ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH net] xsk: remove cheap_dma optimization
On 2020-06-29 15:52, Daniel Borkmann wrote: Ok, fair enough, please work with DMA folks to get this properly integrated and restored then. Applied, thanks! Daniel, you were too quick! Please revert this one; Christoph just submitted a 4-patch-series that addresses both the DMA API, and the perf regression! Björn ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Freedreno] [PATCH v2 6/6] drm/msm/a6xx: Add support for per-instance pagetables
On Sat, Jun 27, 2020 at 01:11:14PM -0700, Rob Clark wrote: > On Sat, Jun 27, 2020 at 12:56 PM Rob Clark wrote: > > > > On Fri, Jun 26, 2020 at 1:04 PM Jordan Crouse > > wrote: > > > > > > Add support for using per-instance pagetables if all the dependencies are > > > available. > > > > > > Signed-off-by: Jordan Crouse > > > --- > > > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++ > > > drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + > > > 2 files changed, 44 insertions(+) > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > index aa53f47b7e8b..95ed2ceac121 100644 > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > @@ -79,6 +79,34 @@ static void get_stats_counter(struct msm_ringbuffer > > > *ring, u32 counter, > > > OUT_RING(ring, upper_32_bits(iova)); > > > } > > > > > > +static void a6xx_set_pagetable(struct msm_gpu *gpu, struct > > > msm_ringbuffer *ring, > > > + struct msm_file_private *ctx) > > > +{ > > > + phys_addr_t ttbr; > > > + u32 asid; > > > + > > > + if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) > > > + return; > > > + > > > + /* Execute the table update */ > > > + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); > > > + OUT_RING(ring, lower_32_bits(ttbr)); > > > + OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr)); > > > + /* CONTEXTIDR is currently unused */ > > > + OUT_RING(ring, 0); > > > + /* CONTEXTBANK is currently unused */ > > > + OUT_RING(ring, 0); > > > + > > > + /* > > > +* Write the new TTBR0 to the memstore. This is good for > > > debugging. > > > +*/ > > > + OUT_PKT7(ring, CP_MEM_WRITE, 4); > > > + OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0))); > > > + OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0))); > > > + OUT_RING(ring, lower_32_bits(ttbr)); > > > + OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr)); > > > +} > > > + > > > static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit > > > *submit, > > > struct msm_file_private *ctx) > > > { > > > @@ -89,6 +117,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct > > > msm_gem_submit *submit, > > > struct msm_ringbuffer *ring = submit->ring; > > > unsigned int i; > > > > > > + a6xx_set_pagetable(gpu, ring, ctx); > > > + > > > get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, > > > rbmemptr_stats(ring, index, cpcycles_start)); > > > > > > @@ -872,6 +902,18 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu > > > *gpu) > > > return (unsigned long)busy_time; > > > } > > > > > > +struct msm_gem_address_space *a6xx_address_space_instance(struct msm_gpu > > > *gpu) > > > +{ > > > + struct msm_mmu *mmu; > > > + > > > + mmu = msm_iommu_pagetable_create(gpu->aspace->mmu); > > > + if (IS_ERR(mmu)) > > > + return msm_gem_address_space_get(gpu->aspace); > > > + > > > + return msm_gem_address_space_create(mmu, > > > + "gpu", 0x1ULL, 0x1ULL); > > > +} > > > + > > > static const struct adreno_gpu_funcs funcs = { > > > .base = { > > > .get_param = adreno_get_param, > > > @@ -895,6 +937,7 @@ static const struct adreno_gpu_funcs funcs = { > > > .gpu_state_put = a6xx_gpu_state_put, > > > #endif > > > .create_address_space = adreno_iommu_create_address_space, > > > + .address_space_instance = a6xx_address_space_instance, > > > > Hmm, maybe instead of .address_space_instance, something like > > .create_context_address_space? > > > > Since like .create_address_space, it is creating an address space.. > > the difference is that it is a per context/process aspace.. > > This is a good suggestion. I'm always open to changing function names. > > > or maybe just .create_pgtable and return the 'struct msm_mmu' (which > is itself starting to become less of a great name).. > > The only other thing a6xx_address_space_instance() adds is knowing > where the split is between the kernel and user pgtables, and I suppose > that isn't a thing that would really be changing between gens? In theory the split is determined by the hardware but its been the same for all a5xx/a6xx targets. Jordan > BR, > -R > > > BR, > > -R > > > > > }, > > > .get_timestamp = a6xx_get_timestamp, > > > }; > > > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h > > > b/drivers/gpu/drm/msm/msm_ringbuffer.h > > > index 7764373d0ed2..0987d6bf848c 100644 > > > --- a/drivers/gpu/drm/msm/msm_ringbuffer.h > > > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h > > > @@ -31,6 +31,7 @@ struct msm_rbmemptrs { > > > volatile uint32_t fence; > > > > > > volatile struct msm_gpu_submit_stats > > > stats[MSM
Re: [Freedreno] [PATCH v9 6/7] drm/msm: Set the global virtual address range from the IOMMU domain
On Sat, Jun 27, 2020 at 10:10:14AM -0700, Rob Clark wrote: > On Fri, Jun 26, 2020 at 1:01 PM Jordan Crouse wrote: > > > > Use the aperture settings from the IOMMU domain to set up the virtual > > address range for the GPU. This allows us to transparently deal with > > IOMMU side features (like split pagetables). > > > > Signed-off-by: Jordan Crouse > > --- > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.c | 13 +++-- > > drivers/gpu/drm/msm/msm_iommu.c | 7 +++ > > 2 files changed, 18 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c > > b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > > index 5db06b590943..3e717c1ebb7f 100644 > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > > @@ -192,9 +192,18 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, > > struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type); > > struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, iommu); > > struct msm_gem_address_space *aspace; > > + u64 start, size; > > > > - aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, > > - 0x - SZ_16M); > > + /* > > +* Use the aperture start or SZ_16M, whichever is greater. This will > > +* ensure that we align with the allocated pagetable range while > > still > > +* allowing room in the lower 32 bits for GMEM and whatnot > > +*/ > > + start = max_t(u64, SZ_16M, iommu->geometry.aperture_start); > > + size = iommu->geometry.aperture_end - start + 1; > > + > > + aspace = msm_gem_address_space_create(mmu, "gpu", > > + start & GENMASK(48, 0), size); > > hmm, I kinda think this isn't going to play well for the 32b gpus > (pre-a5xx).. possibly we should add address space size to 'struct > adreno_info'? I checked and qcom-iommu sets the aperture correctly so this should be okay for everybody. To be honest, I'm nots sure if we even need to mask the start to 49 bits. It seems that all of the iommu implementations do the right thing. Of course it would be worth a check if you have a 4xx handy. > Or I guess it is always going to be the same for all devices within a > generation? So it could just be passed in to adreno_gpu_init() We can do that easily if we are worried about it (see also: a2xx). I just figured this might save us a bit of code. > Hopefully that makes things smoother if we someday had more than 48bits.. We'll be at 49 bits for as far ahead as I can see. 49 bits has a special meaning in the SMMU so it is a natural fit for the GPU hardware. If we change in N generations we can just shift to a family specific function at that point. Jordan > BR, > -R > > > > > if (IS_ERR(aspace) && !IS_ERR(mmu)) > > mmu->funcs->destroy(mmu); > > diff --git a/drivers/gpu/drm/msm/msm_iommu.c > > b/drivers/gpu/drm/msm/msm_iommu.c > > index 3a381a9674c9..1b6635504069 100644 > > --- a/drivers/gpu/drm/msm/msm_iommu.c > > +++ b/drivers/gpu/drm/msm/msm_iommu.c > > @@ -36,6 +36,10 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t > > iova, > > struct msm_iommu *iommu = to_msm_iommu(mmu); > > size_t ret; > > > > + /* The arm-smmu driver expects the addresses to be sign extended */ > > + if (iova & BIT_ULL(48)) > > + iova |= GENMASK_ULL(63, 49); > > + > > ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); > > WARN_ON(!ret); > > > > @@ -46,6 +50,9 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint64_t > > iova, size_t len) > > { > > struct msm_iommu *iommu = to_msm_iommu(mmu); > > > > + if (iova & BIT_ULL(48)) > > + iova |= GENMASK_ULL(63, 49); > > + > > iommu_unmap(iommu->domain, iova, len); > > > > return 0; > > -- > > 2.17.1 > > > > ___ > > Freedreno mailing list > > freedr...@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/freedreno -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: add an API to check if a streamming mapping needs sync calls
On Mon, Jun 29, 2020 at 03:39:01PM +0200, Björn Töpel wrote: > On 2020-06-29 15:03, Christoph Hellwig wrote: >> Hi all, >> >> this series lifts the somewhat hacky checks in the XSK code if a DMA >> streaming mapping needs dma_sync_single_for_{device,cpu} calls to the >> DMA API. >> > > Thanks a lot for working on, and fixing this, Christoph! > > I took the series for a spin, and there are (obviously) no performance > regressions. > > Would the patches go through the net/bpf trees or somewhere else? either the net tree or (my) dma-mapping tree would be fine with me. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH net] xsk: remove cheap_dma optimization
On 6/28/20 7:16 PM, Björn Töpel wrote: On 2020-06-27 09:04, Christoph Hellwig wrote: On Sat, Jun 27, 2020 at 01:00:19AM +0200, Daniel Borkmann wrote: Given there is roughly a ~5 weeks window at max where this removal could still be applied in the worst case, could we come up with a fix / proposal first that moves this into the DMA mapping core? If there is something that can be agreed upon by all parties, then we could avoid re-adding the 9% slowdown. :/ I'd rather turn it upside down - this abuse of the internals blocks work that has basically just missed the previous window and I'm not going to wait weeks to sort out the API misuse. But we can add optimizations back later if we find a sane way. I'm not super excited about the performance loss, but I do get Christoph's frustration about gutting the DMA API making it harder for DMA people to get work done. Lets try to solve this properly using proper DMA APIs. Ok, fair enough, please work with DMA folks to get this properly integrated and restored then. Applied, thanks! ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: add an API to check if a streamming mapping needs sync calls
On 2020-06-29 15:03, Christoph Hellwig wrote: Hi all, this series lifts the somewhat hacky checks in the XSK code if a DMA streaming mapping needs dma_sync_single_for_{device,cpu} calls to the DMA API. Thanks a lot for working on, and fixing this, Christoph! I took the series for a spin, and there are (obviously) no performance regressions. Would the patches go through the net/bpf trees or somewhere else? For the series: Tested-by: Björn Töpel Acked-by: Björn Töpel Björn Diffstat: Documentation/core-api/dma-api.rst |8 + include/linux/dma-direct.h |1 include/linux/dma-mapping.h|5 +++ include/net/xsk_buff_pool.h|6 ++-- kernel/dma/direct.c|6 kernel/dma/mapping.c | 10 ++ net/xdp/xsk_buff_pool.c| 54 ++--- 7 files changed, 37 insertions(+), 53 deletions(-) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: SUN50I_IOMMU should depend on HAS_DMA
On Mon, Jun 29, 2020 at 02:11:46PM +0200, Geert Uytterhoeven wrote: > If NO_DMA=y (e.g. Sun-3 all{mod,yes}-config): > > drivers/iommu/dma-iommu.o: In function `iommu_dma_mmap': > dma-iommu.c:(.text+0x92e): undefined reference to `dma_pgprot' > > IOMMU_DMA must not be selected, unless HAS_DMA=y. > > Hence fix this by making SUN50I_IOMMU depend on HAS_DMA. > > Fixes: 4100b8c229b32835 ("iommu: Add Allwinner H6 IOMMU driver") > Signed-off-by: Geert Uytterhoeven Acked-by: Maxime Ripard Thanks! Maxime signature.asc Description: PGP signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: the XSK buffer pool needs be to reverted
On 2020-06-27 08:02, Christoph Hellwig wrote: On Fri, Jun 26, 2020 at 01:54:12PM -0700, Jonathan Lemon wrote: On Fri, Jun 26, 2020 at 09:47:25AM +0200, Christoph Hellwig wrote: Note that this is somewhat urgent, as various of the APIs that the code is abusing are slated to go away for Linux 5.9, so this addition comes at a really bad time. Could you elaborate on what is upcoming here? Moving all these calls out of line, and adding a bypass flag to avoid the indirect function call for IOMMUs in direct mapped mode. Also, on a semi-related note, are there limitations on how many pages can be left mapped by the iommu? Some of the page pool work involves leaving the pages mapped instead of constantly mapping/unmapping them. There are, but I think for all modern IOMMUs they are so big that they don't matter. Maintaines of the individual IOMMU drivers might know more. Right - I don't know too much about older and more esoteric stuff like POWER TCE, but for modern pagetable-based stuff like Intel VT-d, AMD-Vi, and Arm SMMU, the only "limits" are such that legitimate DMA API use should never get anywhere near them (you'd run out of RAM for actual buffers long beforehand). The most vaguely-realistic concern might be a pathological system topology where some old 32-bit PCI device doesn't have ACS isolation from your high-performance NIC such that they have to share an address space, where the NIC might happen to steal all the low addresses and prevent the soundcard or whatever from being able to map a usable buffer. With an IOMMU, you typically really *want* to keep a full working set's worth of pages mapped, since dma_map/unmap are expensive while dma_sync is somewhere between relatively cheap and free. With no IOMMU it makes no real difference from the DMA API perspective since map/unmap are effectively no more than the equivalent sync operations anyway (I'm assuming we're not talking about the kind of constrained hardware that might need SWIOTLB). On a heavily loaded box with iommu enabled, it seems that quite often there is contention on the iova_lock. Are there known issues in this area? I'll have to defer to the IOMMU maintainers, and for that you'll need to say what code you are using. Current mainlaine doesn't even have an iova_lock anywhere. Again I can't speak for non-mainstream stuff outside drivers/iommu, but it's been over 4 years now since merging the initial scalability work for the generic IOVA allocator there that focused on minimising lock contention, and it's had considerable evaluation and tweaking since. But if we can achieve the goal of efficiently recycling mapped buffers then we shouldn't need to go anywhere near IOVA allocation either way except when expanding the pool. Robin. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 4/4] xsk: use dma_need_sync instead of reimplenting it
Use the dma_need_sync helper instead of (not always entirely correctly) poking into the dma-mapping internals. Signed-off-by: Christoph Hellwig --- net/xdp/xsk_buff_pool.c | 50 +++-- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 6733e2c59e4835..08b80669f64955 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -2,9 +2,6 @@ #include #include -#include -#include -#include #include "xsk_queue.h" @@ -124,48 +121,6 @@ static void xp_check_dma_contiguity(struct xsk_buff_pool *pool) } } -static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_SWIOTLB) - phys_addr_t paddr; - u32 i; - - for (i = 0; i < pool->dma_pages_cnt; i++) { - paddr = dma_to_phys(pool->dev, pool->dma_pages[i]); - if (is_swiotlb_buffer(paddr)) - return false; - } -#endif - return true; -} - -static bool xp_check_cheap_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_HAS_DMA) - const struct dma_map_ops *ops = get_dma_ops(pool->dev); - - if (ops) { - return !ops->sync_single_for_cpu && - !ops->sync_single_for_device; - } - - if (!dma_is_direct(ops)) - return false; - - if (!xp_check_swiotlb_dma(pool)) - return false; - - if (!dev_is_dma_coherent(pool->dev)) { -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) ||\ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) - return false; -#endif - } -#endif - return true; -} - int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages) { @@ -179,6 +134,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, pool->dev = dev; pool->dma_pages_cnt = nr_pages; + pool->dma_need_sync = false; for (i = 0; i < pool->dma_pages_cnt; i++) { dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, @@ -187,13 +143,13 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, xp_dma_unmap(pool, attrs); return -ENOMEM; } + if (dma_need_sync(dev, dma)) + pool->dma_need_sync = true; pool->dma_pages[i] = dma; } if (pool->unaligned) xp_check_dma_contiguity(pool); - - pool->dma_need_sync = !xp_check_cheap_dma(pool); return 0; } EXPORT_SYMBOL(xp_dma_map); -- 2.26.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 2/4] xsk: replace the cheap_dma flag with a dma_need_sync flag
Invert the polarity and better name the flag so that the use case is properly documented. Signed-off-by: Christoph Hellwig --- include/net/xsk_buff_pool.h | 6 +++--- net/xdp/xsk_buff_pool.c | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index a4ff226505c99c..6842990e2712bd 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -40,7 +40,7 @@ struct xsk_buff_pool { u32 headroom; u32 chunk_size; u32 frame_len; - bool cheap_dma; + bool dma_need_sync; bool unaligned; void *addrs; struct device *dev; @@ -80,7 +80,7 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb) void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb); static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) { - if (xskb->pool->cheap_dma) + if (!xskb->pool->dma_need_sync) return; xp_dma_sync_for_cpu_slow(xskb); @@ -91,7 +91,7 @@ void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - if (pool->cheap_dma) + if (!pool->dma_need_sync) return; xp_dma_sync_for_device_slow(pool, dma, size); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 540ed75e44821c..9fe84c797a7060 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -55,7 +55,6 @@ struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, pool->free_heads_cnt = chunks; pool->headroom = headroom; pool->chunk_size = chunk_size; - pool->cheap_dma = true; pool->unaligned = unaligned; pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; INIT_LIST_HEAD(&pool->free_list); @@ -195,7 +194,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, xp_check_dma_contiguity(pool); pool->dev = dev; - pool->cheap_dma = xp_check_cheap_dma(pool); + pool->dma_need_sync = !xp_check_cheap_dma(pool); return 0; } EXPORT_SYMBOL(xp_dma_map); @@ -280,7 +279,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; - if (!pool->cheap_dma) { + if (pool->dma_need_sync) { dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, pool->frame_len, DMA_BIDIRECTIONAL); -- 2.26.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/4] dma-mapping: add a new dma_need_sync API
Add a new API to check if calls to dma_sync_single_for_{device,cpu} are required for a given DMA streaming mapping. Signed-off-by: Christoph Hellwig --- Documentation/core-api/dma-api.rst | 8 include/linux/dma-direct.h | 1 + include/linux/dma-mapping.h| 5 + kernel/dma/direct.c| 6 ++ kernel/dma/mapping.c | 10 ++ 5 files changed, 30 insertions(+) diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 2d8d2fed731720..f41620439ef349 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter of the mapping functions like dma_map_single(), dma_map_page() and others should not be larger than the returned value. +:: + + bool + dma_need_sync(struct device *dev, dma_addr_t dma_addr); + +Returns %true if dma_sync_single_for_{device,cpu} calls are required to +transfer memory ownership. Returns %false if those calls can be skipped. + :: unsigned long diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index cdfa400f89b3d3..5184735a0fe8eb 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -85,4 +85,5 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 78f677cf45ab69..a33ed3954ed465 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -461,6 +461,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, @@ -571,6 +572,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 93f578a8e613ba..95866b64758100 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -539,3 +539,9 @@ size_t dma_direct_max_mapping_size(struct device *dev) return swiotlb_max_mapping_size(dev); return SIZE_MAX; } + +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return !dev_is_dma_coherent(dev) || + is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); +} diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 98e3d873792ea4..a8c18c9a796fdc 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -397,6 +397,16 @@ size_t dma_max_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_max_mapping_size); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_is_direct(ops)) + return dma_direct_need_sync(dev, dma_addr); + return ops->sync_single_for_cpu || ops->sync_single_for_device; +} +EXPORT_SYMBOL_GPL(dma_need_sync); + unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); -- 2.26.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
add an API to check if a streamming mapping needs sync calls
Hi all, this series lifts the somewhat hacky checks in the XSK code if a DMA streaming mapping needs dma_sync_single_for_{device,cpu} calls to the DMA API. Diffstat: Documentation/core-api/dma-api.rst |8 + include/linux/dma-direct.h |1 include/linux/dma-mapping.h|5 +++ include/net/xsk_buff_pool.h|6 ++-- kernel/dma/direct.c|6 kernel/dma/mapping.c | 10 ++ net/xdp/xsk_buff_pool.c| 54 ++--- 7 files changed, 37 insertions(+), 53 deletions(-) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 3/4] xsk: remove a double pool->dev assignment in xp_dma_map
->dev is already assigned at the top of the function, remove the duplicate one at the end. Signed-off-by: Christoph Hellwig --- net/xdp/xsk_buff_pool.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 9fe84c797a7060..6733e2c59e4835 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -193,7 +193,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(pool); - pool->dev = dev; pool->dma_need_sync = !xp_check_cheap_dma(pool); return 0; } -- 2.26.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Regression] "iommu/amd: Relax locking in dma_ops path" makes tg3 ethernet transmit queue timeout
> On May 18, 2020, at 23:32, Kai-Heng Feng wrote: > > > >> On May 18, 2020, at 22:05, Kai-Heng Feng wrote: >> >> >> >>> On May 18, 2020, at 21:32, Joerg Roedel wrote: >>> >>> On Mon, May 18, 2020 at 05:06:45PM +0800, Kai-Heng Feng wrote: Particularly, as soon as the spinlock is removed, the issue can be reproduced. Function domain_flush_complete() doesn't seem to affect the status. However, the .map_page callback was removed by be62dbf554c5 ("iommu/amd: Convert AMD iommu driver to the dma-iommu api"), so there's no easy revert for this issue. This is still reproducible as of today's mainline kernel, v5.7-rc6. >>> >>> Is there any error message from the IOMMU driver? >>> >> >> As of mainline kernel, there's no error message from IOMMU driver. >> There are some complains from v4.15-rc1: >> https://pastebin.ubuntu.com/p/qn4TXkFxsc/ > > Just tested v5.7-rc6, the issue disappears as soon as kernel boots with > "iommu=off". I am still seeing the issue on v5.8-rc3. The issue goes away as soon as "iommu=off" is added. Kai-Heng > > Kai-Heng > >> >> Kai-Heng > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v3 02/14] iommu: Report domain nesting info
Hi Stefan, > From: Stefan Hajnoczi > Sent: Monday, June 29, 2020 5:25 PM > > On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote: > > +/* > > + * struct iommu_nesting_info - Information for nesting-capable IOMMU. > > + * user space should check it before using > > + * nesting capability. > > + * > > + * @size: size of the whole structure > > + * @format:PASID table entry format, the same definition with > > + * @format of struct iommu_gpasid_bind_data. > > + * @features: supported nesting features. > > + * @flags: currently reserved for future extension. > > + * @data: vendor specific cap info. > > + * > > + * +---++ > > + * | feature | Notes | > > + * > +===+=== > =+ > > + * | SYSWIDE_PASID | Kernel manages PASID in system wide, PASIDs used | > > + * | | in the system should be allocated by host kernel | > > + * +---++ > > + * | BIND_PGTBL| bind page tables to host PASID, the PASID could | > > + * | | either be a host PASID passed in bind request or | > > + * | | default PASIDs (e.g. default PASID of aux-domain) | > > + * +---++ > > + * | CACHE_INVLD | mandatory feature for nesting capable IOMMU | > > + * +---++ > > This feature description is vague about what CACHE_INVLD does and how to > use it. If I understand correctly, the presence of this feature means > that VFIO_IOMMU_NESTING_OP_CACHE_INVLD must be used? > > The same kind of clarification could be done for SYSWIDE_PASID and > BIND_PGTBL too. For SYSWIDE_PASID and BIND_PGTBL, yes, presence of the feature bit means must use. So the two are requirements to user space if it wants to setup nesting. While for CACHE_INVLD, it's kind of availability here. How about removing CACHE_INVLD as presence of BIND_PGTBL should indicates support of CACHE_INVLD? Regards, Yi Liu > Stefan ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu: SUN50I_IOMMU should depend on HAS_DMA
If NO_DMA=y (e.g. Sun-3 all{mod,yes}-config): drivers/iommu/dma-iommu.o: In function `iommu_dma_mmap': dma-iommu.c:(.text+0x92e): undefined reference to `dma_pgprot' IOMMU_DMA must not be selected, unless HAS_DMA=y. Hence fix this by making SUN50I_IOMMU depend on HAS_DMA. Fixes: 4100b8c229b32835 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Geert Uytterhoeven --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6dc49ed8377a5c12..b0f308cb7f7c2fc2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -305,6 +305,7 @@ config ROCKCHIP_IOMMU config SUN50I_IOMMU bool "Allwinner H6 IOMMU Support" + depends on HAS_DMA depends on ARCH_SUNXI || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/2] iommu: Add iommu_group_get/set_domain()
On 2020-06-27 04:15, Lu Baolu wrote: The hardware assistant vfio mediated device is a use case of iommu aux-domain. The interactions between vfio/mdev and iommu during mdev creation and passthr are: - Create a group for mdev with iommu_group_alloc(); - Add the device to the group with group = iommu_group_alloc(); if (IS_ERR(group)) return PTR_ERR(group); ret = iommu_group_add_device(group, &mdev->dev); if (!ret) dev_info(&mdev->dev, "MDEV: group_id = %d\n", iommu_group_id(group)); - Allocate an aux-domain iommu_domain_alloc() - Attach the aux-domain to the physical device from which the mdev is created. iommu_aux_attach_device() In the whole process, an iommu group was allocated for the mdev and an iommu domain was attached to the group, but the group->domain leaves NULL. As the result, iommu_get_domain_for_dev() doesn't work anymore. This adds iommu_group_get/set_domain() so that group->domain could be managed whenever a domain is attached or detached through the aux-domain api's. Letting external callers poke around directly in the internals of iommu_group doesn't look right to me. If a regular device is attached to one or more aux domains for PASID use, iommu_get_domain_for_dev() is still going to return the primary domain, so why should it be expected to behave differently for mediated devices? AFAICS it's perfectly legitimate to have no primary domain if traffic-without-PASID is invalid. Robin. Fixes: 7bd50f0cd2fd5 ("vfio/type1: Add domain at(de)taching group helpers") Signed-off-by: Lu Baolu --- drivers/iommu/iommu.c | 28 include/linux/iommu.h | 14 ++ 2 files changed, 42 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d43120eb1dc5..e2b665303d70 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -715,6 +715,34 @@ int iommu_group_set_name(struct iommu_group *group, const char *name) } EXPORT_SYMBOL_GPL(iommu_group_set_name); +/** + * iommu_group_get_domain - get domain of a group + * @group: the group + * + * This is called to get the domain of a group. + */ +struct iommu_domain *iommu_group_get_domain(struct iommu_group *group) +{ + return group->domain; +} +EXPORT_SYMBOL_GPL(iommu_group_get_domain); + +/** + * iommu_group_set_domain - set domain for a group + * @group: the group + * @domain: iommu domain + * + * This is called to set the domain for a group. In aux-domain case, a domain + * might attach or detach to an iommu group through the aux-domain apis, but + * the group->domain doesn't get a chance to be updated there. + */ +void iommu_group_set_domain(struct iommu_group *group, + struct iommu_domain *domain) +{ + group->domain = domain; +} +EXPORT_SYMBOL_GPL(iommu_group_set_domain); + static int iommu_create_device_direct_mappings(struct iommu_group *group, struct device *dev) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5f0b7859d2eb..ff88d548a870 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -496,6 +496,9 @@ extern void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, void (*release)(void *iommu_data)); extern int iommu_group_set_name(struct iommu_group *group, const char *name); +extern struct iommu_domain *iommu_group_get_domain(struct iommu_group *group); +extern void iommu_group_set_domain(struct iommu_group *group, + struct iommu_domain *domain); extern int iommu_group_add_device(struct iommu_group *group, struct device *dev); extern void iommu_group_remove_device(struct device *dev); @@ -840,6 +843,17 @@ static inline int iommu_group_set_name(struct iommu_group *group, return -ENODEV; } +static inline +struct iommu_domain *iommu_group_get_domain(struct iommu_group *group) +{ + return NULL; +} + +static inline void iommu_group_set_domain(struct iommu_group *group, + struct iommu_domain *domain) +{ +} + static inline int iommu_group_add_device(struct iommu_group *group, struct device *dev) { ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 13/13] powerpc/dma: Remove dev->archdata.iommu_domain
Joerg Roedel writes: > From: Joerg Roedel > > There are no users left, so remove the pointer and save some memory. > > Signed-off-by: Joerg Roedel > --- > arch/powerpc/include/asm/device.h | 3 --- > 1 file changed, 3 deletions(-) It's a little hard to confirm there are no users left just with grep, but I think you've got them all, and the compiler should tell us if you've missed any. Acked-by: Michael Ellerman (powerpc) cheers > diff --git a/arch/powerpc/include/asm/device.h > b/arch/powerpc/include/asm/device.h > index 266542769e4b..1bc595213338 100644 > --- a/arch/powerpc/include/asm/device.h > +++ b/arch/powerpc/include/asm/device.h > @@ -34,9 +34,6 @@ struct dev_archdata { > struct iommu_table *iommu_table_base; > #endif > > -#ifdef CONFIG_IOMMU_API > - void*iommu_domain; > -#endif > #ifdef CONFIG_PPC64 > struct pci_dn *pci_data; > #endif > -- > 2.27.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 10/10] iommu/mediatek: Add mt6779 basic support
On 29/06/2020 09:13, Chao Hao wrote: > 1. Start from mt6779, INVLDT_SEL move to offset=0x2c, so we add >REG_MMU_INV_SEL_GEN2 definition and mt6779 uses it. > 2. Add mt6779_data to support mm_iommu HW init. > > Cc: Yong Wu > Cc: Matthias Brugger > Signed-off-by: Chao Hao Reviewed by: Matthias Brugger > --- > drivers/iommu/mtk_iommu.c | 11 +++ > drivers/iommu/mtk_iommu.h | 1 + > 2 files changed, 12 insertions(+) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index e46e2deee3fd..1575196d9cd5 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -37,6 +37,7 @@ > #define REG_MMU_INVLD_START_A0x024 > #define REG_MMU_INVLD_END_A 0x028 > > +#define REG_MMU_INV_SEL_GEN2 0x02c > #define REG_MMU_INV_SEL_GEN1 0x038 > #define F_INVLD_EN0 BIT(0) > #define F_INVLD_EN1 BIT(1) > @@ -798,6 +799,15 @@ static const struct mtk_iommu_plat_data mt2712_data = { > .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, > }; > > +static const struct mtk_iommu_plat_data mt6779_data = { > + .m4u_plat = M4U_MT6779, > + .flags = HAS_SUB_COMM | > + OUT_ORDER_EN | > + WR_THROT_EN, > + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, > + .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, > +}; > + > static const struct mtk_iommu_plat_data mt8173_data = { > .m4u_plat = M4U_MT8173, > .flags= HAS_4GB_MODE | > @@ -816,6 +826,7 @@ static const struct mtk_iommu_plat_data mt8183_data = { > > static const struct of_device_id mtk_iommu_of_ids[] = { > { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, > + { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, > { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, > { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, > {} > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > index ce4f4e8f03aa..a080db2e8a93 100644 > --- a/drivers/iommu/mtk_iommu.h > +++ b/drivers/iommu/mtk_iommu.h > @@ -43,6 +43,7 @@ struct mtk_iommu_suspend_reg { > enum mtk_iommu_plat { > M4U_MT2701, > M4U_MT2712, > + M4U_MT6779, > M4U_MT8173, > M4U_MT8183, > }; > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 09/10] iommu/mediatek: Modify MMU_CTRL register setting
On 29/06/2020 09:13, Chao Hao wrote: > MT8173 is different from other SoCs for MMU_CTRL register. > For mt8173, its bit9 is in_order_write_en and doesn't use its > default 1'b1.> For other SoCs, bit[12] represents victim_tlb_en feature and > victim_tlb is enable defaultly(bit[12]=1), if we use > "regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR", victim_tlb will be > disabled, it will drop iommu performace. > So we need to deal with the setting of MMU_CTRL separately > for mt8173 and others. > My proposal to rewrite the commit message: The MMU_CTRL regiser of MT8173 is different from other SoCs. The in_order_wr_en is bit[9] which is zero by default. Other SoCs have the vitcim_tlb_en feature mapped to bit[12]. This bit is set to one by default. We need to preserve the bit when setting F_MMU_TF_PROT_TO_PROGRAM_ADDR as otherwise the bit will be cleared and IOMMU performance will drop. > Suggested-by: Matthias Brugger > Suggested-by: Yong Wu > Signed-off-by: Chao Hao > --- > drivers/iommu/mtk_iommu.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index 8299a3299090..e46e2deee3fd 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -543,11 +543,12 @@ static int mtk_iommu_hw_init(const struct > mtk_iommu_data *data) > return ret; > } > > + regval = readl_relaxed(data->base + REG_MMU_CTRL_REG); The read is only needed in the else branch. > if (data->plat_data->m4u_plat == M4U_MT8173) > regval = F_MMU_PREFETCH_RT_REPLACE_MOD | >F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173; > else > - regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR; > + regval |= F_MMU_TF_PROT_TO_PROGRAM_ADDR; > writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); > > regval = F_L2_MULIT_HIT_EN | > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 07/10] iommu/mediatek: Add REG_MMU_WR_LEN register definition
On 29/06/2020 09:13, Chao Hao wrote: > Some platforms(ex: mt6779) need to improve performance by setting > REG_MMU_WR_LEN register. And we can use WR_THROT_EN macro to control > whether we need to set the register. If the register uses default value, > iommu will send command to EMI without restriction, when the number of > commands become more and more, it will drop the EMI performance. So when > more than ten_commands(default value) don't be handled for EMI, iommu will > stop send command to EMI for keeping EMI's performace by enabling write > throttling mechanism(bit[5][21]=0) in MMU_WR_LEN_CTRL register. > > Cc: Matthias Brugger > Signed-off-by: Chao Hao > --- > drivers/iommu/mtk_iommu.c | 10 ++ > drivers/iommu/mtk_iommu.h | 2 ++ > 2 files changed, 12 insertions(+) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index ec1f86913739..92316c4175a9 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -46,6 +46,8 @@ > #define F_MMU_STANDARD_AXI_MODE_BIT (BIT(3) | BIT(19)) > > #define REG_MMU_DCM_DIS 0x050 > +#define REG_MMU_WR_LEN 0x054 The register name is confusing. For me it seems to describe the length of a write but it is used for controlling the write throttling. Is this the name that's used in the datasheet? > +#define F_MMU_WR_THROT_DIS_BIT (BIT(5) | BIT(21)) There are two spaces between '|' and 'BIT(21)', should be one. Regarding the name of the define, what does the 'F_' statnds for? Also I think it should be called '_MASK' instead of '_BIT' as it defines a mask of bits. Regards, Matthias > > #define REG_MMU_CTRL_REG 0x110 > #define F_MMU_TF_PROT_TO_PROGRAM_ADDR(2 << 4) > @@ -582,6 +584,12 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data > *data) > writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); > } > writel_relaxed(0, data->base + REG_MMU_DCM_DIS); > + if (MTK_IOMMU_HAS_FLAG(data->plat_data, WR_THROT_EN)) { > + /* write command throttling mode */ > + regval = readl_relaxed(data->base + REG_MMU_WR_LEN); > + regval &= ~F_MMU_WR_THROT_DIS_BIT; > + writel_relaxed(regval, data->base + REG_MMU_WR_LEN); > + } > > regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL); > if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { > @@ -737,6 +745,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device > *dev) > struct mtk_iommu_suspend_reg *reg = &data->reg; > void __iomem *base = data->base; > > + reg->wr_len = readl_relaxed(base + REG_MMU_WR_LEN); > reg->misc_ctrl = readl_relaxed(base + REG_MMU_MISC_CTRL); > reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS); > reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); > @@ -761,6 +770,7 @@ static int __maybe_unused mtk_iommu_resume(struct device > *dev) > dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); > return ret; > } > + writel_relaxed(reg->wr_len, base + REG_MMU_WR_LEN); > writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL); > writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); > writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG); > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > index be6d32ee5bda..ce4f4e8f03aa 100644 > --- a/drivers/iommu/mtk_iommu.h > +++ b/drivers/iommu/mtk_iommu.h > @@ -24,6 +24,7 @@ > #define RESET_AXIBIT(3) > #define OUT_ORDER_EN BIT(4) > #define HAS_SUB_COMM BIT(5) > +#define WR_THROT_EN BIT(6) > > #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ > pdata)->flags) & (_x)) == (_x)) > @@ -36,6 +37,7 @@ struct mtk_iommu_suspend_reg { > u32 int_main_control; > u32 ivrp_paddr; > u32 vld_pa_rng; > + u32 wr_len; > }; > > enum mtk_iommu_plat { > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 08/10] iommu/mediatek: Extend protect pa alignment value
On 29/06/2020 09:13, Chao Hao wrote: > Starting with mt6779, iommu needs to extend to 256 bytes from 128 > bytes which can send the max number of data for memory protection > pa alignment. So we can use a separate patch to modify it. > > Suggested-by: Matthias Brugger > Signed-off-by: Chao Hao Reviewed-by: Matthias Brugger > --- > drivers/iommu/mtk_iommu.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index 92316c4175a9..8299a3299090 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -98,7 +98,7 @@ > #define F_MMU_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7) > #define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f) > > -#define MTK_PROTECT_PA_ALIGN 128 > +#define MTK_PROTECT_PA_ALIGN 256 > > /* > * Get the local arbiter ID and the portid within the larb arbiter > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 04/10] iommu/mediatek: Setting MISC_CTRL register
On 29/06/2020 09:13, Chao Hao wrote: > Add F_MMU_IN_ORDER_WR_EN and F_MMU_STANDARD_AXI_MODE_BIT definition > in MISC_CTRL register. > F_MMU_STANDARD_AXI_MODE_BIT: > If we set F_MMU_STANDARD_AXI_MODE_BIT(bit[3][19] = 0, not follow > standard AXI protocol), iommu will send urgent read command firstly > compare with normal read command to improve performance. Can you please help me to understand the phrase. Sorry I'm not a AXI specialist. Does this mean that you will send a 'urgent read command' which is not described in the specifications instead of a normal read command? > F_MMU_IN_ORDER_WR_EN: > If we set F_MMU_IN_ORDER_WR_EN(bit[1][17] = 0, out-of-order write), iommu > will re-order write command and send more higher priority write command > instead of sending write command in order. The feature be controlled > by OUT_ORDER_EN macro definition. > > Cc: Matthias Brugger > Suggested-by: Yong Wu > Signed-off-by: Chao Hao > --- > drivers/iommu/mtk_iommu.c | 12 +++- > drivers/iommu/mtk_iommu.h | 1 + > 2 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index 8f81df6cbe51..67b46b5d83d9 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -42,6 +42,9 @@ > #define F_INVLD_EN1 BIT(1) > > #define REG_MMU_MISC_CTRL0x048 > +#define F_MMU_IN_ORDER_WR_EN (BIT(1) | BIT(17)) > +#define F_MMU_STANDARD_AXI_MODE_BIT (BIT(3) | BIT(19)) Wouldn't it make more sense to name it F_MMU_STANDARD_AXI_MODE_EN? > + > #define REG_MMU_DCM_DIS 0x050 > > #define REG_MMU_CTRL_REG 0x110 > @@ -574,10 +577,17 @@ static int mtk_iommu_hw_init(const struct > mtk_iommu_data *data) > } > writel_relaxed(0, data->base + REG_MMU_DCM_DIS); > > + regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL); We only need to read regval in the else branch. > if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { > /* The register is called STANDARD_AXI_MODE in this case */ > - writel_relaxed(0, data->base + REG_MMU_MISC_CTRL); > + regval = 0; > + } else { > + /* For mm_iommu, it can improve performance by the setting */ > + regval &= ~F_MMU_STANDARD_AXI_MODE_BIT; > + if (MTK_IOMMU_HAS_FLAG(data->plat_data, OUT_ORDER_EN)) > + regval &= ~F_MMU_IN_ORDER_WR_EN; > } > + writel_relaxed(regval, data->base + REG_MMU_MISC_CTRL); > > if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, >dev_name(data->dev), (void *)data)) { > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > index 7cc39f729263..4b780b651ef4 100644 > --- a/drivers/iommu/mtk_iommu.h > +++ b/drivers/iommu/mtk_iommu.h > @@ -22,6 +22,7 @@ > #define HAS_BCLK BIT(1) > #define HAS_VLD_PA_RNG BIT(2) > #define RESET_AXIBIT(3) > +#define OUT_ORDER_EN BIT(4) Maybe something like OUT_ORDER_WR_EN, to make clear that it's about the the write path. > > #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ > pdata)->flags) & (_x)) == (_x)) > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v3 13/14] vfio: Document dual stage control
> From: Stefan Hajnoczi > Sent: Monday, June 29, 2020 5:22 PM > > On Wed, Jun 24, 2020 at 01:55:26AM -0700, Liu Yi L wrote: > > +Details can be found in Documentation/userspace-api/iommu.rst. For > > +Intel VT-d, each stage 1 page table is bound to host by: > > + > > +nesting_op->flags = VFIO_IOMMU_NESTING_OP_BIND_PGTBL; > > +memcpy(&nesting_op->data, &bind_data, sizeof(bind_data)); > > +ioctl(container->fd, VFIO_IOMMU_NESTING_OP, nesting_op); > > + > > +As mentioned above, guest OS may use stage 1 for GIOVA->GPA or GVA->GPA. > > +GVA->GPA page tables are available when PASID (Process Address Space > > +GVA->ID) > > +is exposed to guest. e.g. guest with PASID-capable devices assigned. > > +For such page table binding, the bind_data should include PASID info, > > +which is allocated by guest itself or by host. This depends on > > +hardware vendor e.g. Intel VT-d requires to allocate PASID from host. > > +This requirement is defined by the Virtual Command Support in VT-d > > +3.0 spec, guest software running on VT-d should allocate PASID from > > +host kernel. To allocate PASID from host, user space should +check > > +the IOMMU_NESTING_FEAT_SYSWIDE_PASID > > s/+check/check/g got it. > Reviewed-by: Stefan Hajnoczi thanks :-) Regards, Yi Liu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 02/14] iommu: Report domain nesting info
On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote: > +/* > + * struct iommu_nesting_info - Information for nesting-capable IOMMU. > + * user space should check it before using > + * nesting capability. > + * > + * @size:size of the whole structure > + * @format: PASID table entry format, the same definition with > + * @format of struct iommu_gpasid_bind_data. > + * @features:supported nesting features. > + * @flags: currently reserved for future extension. > + * @data:vendor specific cap info. > + * > + * +---++ > + * | feature | Notes | > + * +===++ > + * | SYSWIDE_PASID | Kernel manages PASID in system wide, PASIDs used | > + * | | in the system should be allocated by host kernel | > + * +---++ > + * | BIND_PGTBL| bind page tables to host PASID, the PASID could | > + * | | either be a host PASID passed in bind request or | > + * | | default PASIDs (e.g. default PASID of aux-domain) | > + * +---++ > + * | CACHE_INVLD | mandatory feature for nesting capable IOMMU | > + * +---++ This feature description is vague about what CACHE_INVLD does and how to use it. If I understand correctly, the presence of this feature means that VFIO_IOMMU_NESTING_OP_CACHE_INVLD must be used? The same kind of clarification could be done for SYSWIDE_PASID and BIND_PGTBL too. Stefan signature.asc Description: PGP signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 13/14] vfio: Document dual stage control
On Wed, Jun 24, 2020 at 01:55:26AM -0700, Liu Yi L wrote: > +Details can be found in Documentation/userspace-api/iommu.rst. For Intel > +VT-d, each stage 1 page table is bound to host by: > + > +nesting_op->flags = VFIO_IOMMU_NESTING_OP_BIND_PGTBL; > +memcpy(&nesting_op->data, &bind_data, sizeof(bind_data)); > +ioctl(container->fd, VFIO_IOMMU_NESTING_OP, nesting_op); > + > +As mentioned above, guest OS may use stage 1 for GIOVA->GPA or GVA->GPA. > +GVA->GPA page tables are available when PASID (Process Address Space ID) > +is exposed to guest. e.g. guest with PASID-capable devices assigned. For > +such page table binding, the bind_data should include PASID info, which > +is allocated by guest itself or by host. This depends on hardware vendor > +e.g. Intel VT-d requires to allocate PASID from host. This requirement is > +defined by the Virtual Command Support in VT-d 3.0 spec, guest software > +running on VT-d should allocate PASID from host kernel. To allocate PASID > +from host, user space should +check the IOMMU_NESTING_FEAT_SYSWIDE_PASID s/+check/check/g Reviewed-by: Stefan Hajnoczi signature.asc Description: PGP signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 03/10] iommu/mediatek: Modify the usage of mtk_iommu_plat_data structure
On 29/06/2020 09:13, Chao Hao wrote: > Given the fact that we are adding more and more plat_data bool values, > it would make sense to use a u32 flags register and add the appropriate > macro definitions to set and check for a flag present. > No functional change. > > Suggested-by: Matthias Brugger > Signed-off-by: Chao Hao Reviewed-by: Matthias Brugger > --- > drivers/iommu/mtk_iommu.c | 23 --- > drivers/iommu/mtk_iommu.h | 16 ++-- > 2 files changed, 22 insertions(+), 17 deletions(-) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index 88d3df5b91c2..8f81df6cbe51 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -563,7 +563,8 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data > *data) >upper_32_bits(data->protect_base); > writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR); > > - if (data->enable_4GB && data->plat_data->has_vld_pa_rng) { > + if (data->enable_4GB && > + MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_VLD_PA_RNG)) { > /* >* If 4GB mode is enabled, the validate PA range is from >* 0x1__ to 0x1__. here record bit[32:30]. > @@ -573,7 +574,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data > *data) > } > writel_relaxed(0, data->base + REG_MMU_DCM_DIS); > > - if (data->plat_data->reset_axi) { > + if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { > /* The register is called STANDARD_AXI_MODE in this case */ > writel_relaxed(0, data->base + REG_MMU_MISC_CTRL); > } > @@ -618,7 +619,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) > > /* Whether the current dram is over 4GB */ > data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT)); > - if (!data->plat_data->has_4gb_mode) > + if (!MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) > data->enable_4GB = false; > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > @@ -631,7 +632,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) > if (data->irq < 0) > return data->irq; > > - if (data->plat_data->has_bclk) { > + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_BCLK)) { > data->bclk = devm_clk_get(dev, "bclk"); > if (IS_ERR(data->bclk)) > return PTR_ERR(data->bclk); > @@ -763,23 +764,23 @@ static const struct dev_pm_ops mtk_iommu_pm_ops = { > > static const struct mtk_iommu_plat_data mt2712_data = { > .m4u_plat = M4U_MT2712, > - .has_4gb_mode = true, > - .has_bclk = true, > - .has_vld_pa_rng = true, > + .flags= HAS_4GB_MODE | > + HAS_BCLK | > + HAS_VLD_PA_RNG, > .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, > }; > > static const struct mtk_iommu_plat_data mt8173_data = { > .m4u_plat = M4U_MT8173, > - .has_4gb_mode = true, > - .has_bclk = true, > - .reset_axi= true, > + .flags= HAS_4GB_MODE | > + HAS_BCLK | > + RESET_AXI, > .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */ > }; > > static const struct mtk_iommu_plat_data mt8183_data = { > .m4u_plat = M4U_MT8183, > - .reset_axi= true, > + .flags= RESET_AXI, > .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1}, > }; > > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > index 1b6ea839b92c..7cc39f729263 100644 > --- a/drivers/iommu/mtk_iommu.h > +++ b/drivers/iommu/mtk_iommu.h > @@ -17,6 +17,15 @@ > #include > #include > > +#define HAS_4GB_MODE BIT(0) > +/* HW will use the EMI clock if there isn't the "bclk". */ > +#define HAS_BCLK BIT(1) > +#define HAS_VLD_PA_RNG BIT(2) > +#define RESET_AXIBIT(3) > + > +#define MTK_IOMMU_HAS_FLAG(pdata, _x) \ > + pdata)->flags) & (_x)) == (_x)) > + > struct mtk_iommu_suspend_reg { > u32 misc_ctrl; > u32 dcm_dis; > @@ -36,12 +45,7 @@ enum mtk_iommu_plat { > > struct mtk_iommu_plat_data { > enum mtk_iommu_plat m4u_plat; > - boolhas_4gb_mode; > - > - /* HW will use the EMI clock if there isn't the "bclk". */ > - boolhas_bclk; > - boolhas_vld_pa_rng; > - boolreset_axi; > + u32 flags; > unsigned char larbid_remap[MTK_LARB_NR_MAX]; > }; > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 01/12] ACPI/IORT: Make iort_match_node_callback walk the ACPI namespace for NC
On Mon, Jun 29, 2020 at 12:24:43PM +0800, Hanjun Guo wrote: > Hi Lorenzo, > > On 2020/6/19 16:20, Lorenzo Pieralisi wrote: > > When the iort_match_node_callback is invoked for a named component > > the match should be executed upon a device with an ACPI companion. > > > > For devices with no ACPI companion set-up the ACPI device tree must be > > walked in order to find the first parent node with a companion set and > > check the parent node against the named component entry to check whether > > there is a match and therefore an IORT node describing the in/out ID > > translation for the device has been found. > > > > Signed-off-by: Lorenzo Pieralisi > > Cc: Will Deacon > > Cc: Hanjun Guo > > Cc: Sudeep Holla > > Cc: Catalin Marinas > > Cc: Robin Murphy > > Cc: "Rafael J. Wysocki" > > --- > > drivers/acpi/arm64/iort.c | 20 ++-- > > 1 file changed, 18 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c > > index 28a6b387e80e..5eee81758184 100644 > > --- a/drivers/acpi/arm64/iort.c > > +++ b/drivers/acpi/arm64/iort.c > > @@ -264,15 +264,31 @@ static acpi_status iort_match_node_callback(struct > > acpi_iort_node *node, > > if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) { > > struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; > > - struct acpi_device *adev = to_acpi_device_node(dev->fwnode); > > + struct acpi_device *adev; > > struct acpi_iort_named_component *ncomp; > > + struct device *nc_dev = dev; > > + > > + /* > > +* Walk the device tree to find a device with an > > +* ACPI companion; there is no point in scanning > > +* IORT for a device matching a named component if > > +* the device does not have an ACPI companion to > > +* start with. > > +*/ > > + do { > > + adev = ACPI_COMPANION(nc_dev); > > + if (adev) > > + break; > > + > > + nc_dev = nc_dev->parent; > > + } while (nc_dev); > > I'm lost here, we need the ACPI_COMPANION (the same as > to_acpi_device_node()) of the device, but why do we need to go > up to find the parent node? For devices that aren't described in the DSDT - IORT translations are determined by their ACPI parent device. Do you see/Have you found any issue with this approach ? > For a platform device, if I use its parent's full path name for > its named component entry, then it will match, but this will violate > the IORT spec. Can you elaborate on this please I don't get the point you are making. Thanks, Lorenzo ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2] dma-pool: Fix too large DMA pools on medium systems
On Sat, Jun 27, 2020 at 06:13:43PM +0200, Marion & Christophe JAILLET wrote: > I'm sorry, but I will not send pci_ --> dma_ conversion for this driver. > I'm a bit puzzled by some choice of GFP_KERNEL and GFP_ATOMIC that not all > that obvious to me. > > I'll try to send some patches for other easier drivers in the coming weeks. No problem, I sent a patch for the conversion of the allocations that caused problems. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [patch] dma-pool: warn when coherent pool is depleted
On Sat, Jun 27, 2020 at 09:25:21PM -0700, David Rientjes wrote: > Thanks Guenter. Christoph, does it make sense to apply this patch since > there may not be an artifact left behind in the kernel log on allocation > failure by the caller? Sorry, this fell through the cracks. I've added it to the dma-mapping tree now. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 01/13] iommu/exynos: Use dev_iommu_priv_get/set()
On 25.06.2020 15:08, Joerg Roedel wrote: > From: Joerg Roedel > > Remove the use of dev->archdata.iommu and use the private per-device > pointer provided by IOMMU core code instead. > > Signed-off-by: Joerg Roedel Acked-by: Marek Szyprowski > --- > drivers/iommu/exynos-iommu.c | 20 +-- > .../media/platform/s5p-mfc/s5p_mfc_iommu.h| 4 +++- > 2 files changed, 13 insertions(+), 11 deletions(-) > > diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c > index 60c8a56e4a3f..6a9b67302369 100644 > --- a/drivers/iommu/exynos-iommu.c > +++ b/drivers/iommu/exynos-iommu.c > @@ -173,7 +173,7 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) > #define REG_V5_FAULT_AR_VA 0x070 > #define REG_V5_FAULT_AW_VA 0x080 > > -#define has_sysmmu(dev) (dev->archdata.iommu != NULL) > +#define has_sysmmu(dev) (dev_iommu_priv_get(dev) != NULL) > > static struct device *dma_dev; > static struct kmem_cache *lv2table_kmem_cache; > @@ -226,7 +226,7 @@ static const struct sysmmu_fault_info sysmmu_v5_faults[] > = { > }; > > /* > - * This structure is attached to dev.archdata.iommu of the master device > + * This structure is attached to dev->iommu->priv of the master device >* on device add, contains a list of SYSMMU controllers defined by device > tree, >* which are bound to given master device. It is usually referenced by > 'owner' >* pointer. > @@ -670,7 +670,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct > device *dev) > struct device *master = data->master; > > if (master) { > - struct exynos_iommu_owner *owner = master->archdata.iommu; > + struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); > > mutex_lock(&owner->rpm_lock); > if (data->domain) { > @@ -688,7 +688,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct > device *dev) > struct device *master = data->master; > > if (master) { > - struct exynos_iommu_owner *owner = master->archdata.iommu; > + struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); > > mutex_lock(&owner->rpm_lock); > if (data->domain) { > @@ -837,8 +837,8 @@ static void exynos_iommu_domain_free(struct iommu_domain > *iommu_domain) > static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain, > struct device *dev) > { > - struct exynos_iommu_owner *owner = dev->archdata.iommu; > struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain); > + struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); > phys_addr_t pagetable = virt_to_phys(domain->pgtable); > struct sysmmu_drvdata *data, *next; > unsigned long flags; > @@ -875,8 +875,8 @@ static void exynos_iommu_detach_device(struct > iommu_domain *iommu_domain, > static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain, > struct device *dev) > { > - struct exynos_iommu_owner *owner = dev->archdata.iommu; > struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain); > + struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); > struct sysmmu_drvdata *data; > phys_addr_t pagetable = virt_to_phys(domain->pgtable); > unsigned long flags; > @@ -1237,7 +1237,7 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct > iommu_domain *iommu_domain, > > static struct iommu_device *exynos_iommu_probe_device(struct device *dev) > { > - struct exynos_iommu_owner *owner = dev->archdata.iommu; > + struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); > struct sysmmu_drvdata *data; > > if (!has_sysmmu(dev)) > @@ -1263,7 +1263,7 @@ static struct iommu_device > *exynos_iommu_probe_device(struct device *dev) > > static void exynos_iommu_release_device(struct device *dev) > { > - struct exynos_iommu_owner *owner = dev->archdata.iommu; > + struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); > struct sysmmu_drvdata *data; > > if (!has_sysmmu(dev)) > @@ -1287,8 +1287,8 @@ static void exynos_iommu_release_device(struct device > *dev) > static int exynos_iommu_of_xlate(struct device *dev, >struct of_phandle_args *spec) > { > - struct exynos_iommu_owner *owner = dev->archdata.iommu; > struct platform_device *sysmmu = of_find_device_by_node(spec->np); > + struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); > struct sysmmu_drvdata *data, *entry; > > if (!sysmmu) > @@ -1305,7 +1305,7 @@ static int exynos_iommu_of_xlate(struct device *dev, > > INIT_LIST_HEAD(&owner->controllers); > mutex_init(&owner->rpm_lock); > - dev->archdata.iommu = owner; > + dev_iommu_priv_set(dev, owner); > } > > l
[PATCH v5 05/10] iommu/mediatek: Move inv_sel_reg into the plat_data
For mt6779, MMU_INV_SEL register's offset is changed from 0x38 to 0x2c, so we can put inv_sel_reg in the plat_data to use it. In addition, we renamed it to REG_MMU_INV_SEL_GEN1 and use it before mt6779. Cc: Yong Wu Signed-off-by: Chao Hao Reviewed-by: Matthias Brugger --- drivers/iommu/mtk_iommu.c | 9 ++--- drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 67b46b5d83d9..116418cc1cf8 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -37,7 +37,7 @@ #define REG_MMU_INVLD_START_A 0x024 #define REG_MMU_INVLD_END_A0x028 -#define REG_MMU_INV_SEL0x038 +#define REG_MMU_INV_SEL_GEN1 0x038 #define F_INVLD_EN0BIT(0) #define F_INVLD_EN1BIT(1) @@ -168,7 +168,7 @@ static void mtk_iommu_tlb_flush_all(void *cookie) for_each_m4u(data) { writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, - data->base + REG_MMU_INV_SEL); + data->base + data->plat_data->inv_sel_reg); writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ } @@ -185,7 +185,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, for_each_m4u(data) { spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, - data->base + REG_MMU_INV_SEL); + data->base + data->plat_data->inv_sel_reg); writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); writel_relaxed(iova + size - 1, @@ -777,6 +777,7 @@ static const struct mtk_iommu_plat_data mt2712_data = { .flags= HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG, + .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, }; @@ -785,12 +786,14 @@ static const struct mtk_iommu_plat_data mt8173_data = { .flags= HAS_4GB_MODE | HAS_BCLK | RESET_AXI, + .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */ }; static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, .flags= RESET_AXI, + .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1}, }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 4b780b651ef4..ae70296af2b4 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -47,6 +47,7 @@ enum mtk_iommu_plat { struct mtk_iommu_plat_data { enum mtk_iommu_plat m4u_plat; u32 flags; + u32 inv_sel_reg; unsigned char larbid_remap[MTK_LARB_NR_MAX]; }; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 07/10] iommu/mediatek: Add REG_MMU_WR_LEN register definition
Some platforms(ex: mt6779) need to improve performance by setting REG_MMU_WR_LEN register. And we can use WR_THROT_EN macro to control whether we need to set the register. If the register uses default value, iommu will send command to EMI without restriction, when the number of commands become more and more, it will drop the EMI performance. So when more than ten_commands(default value) don't be handled for EMI, iommu will stop send command to EMI for keeping EMI's performace by enabling write throttling mechanism(bit[5][21]=0) in MMU_WR_LEN_CTRL register. Cc: Matthias Brugger Signed-off-by: Chao Hao --- drivers/iommu/mtk_iommu.c | 10 ++ drivers/iommu/mtk_iommu.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ec1f86913739..92316c4175a9 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -46,6 +46,8 @@ #define F_MMU_STANDARD_AXI_MODE_BIT(BIT(3) | BIT(19)) #define REG_MMU_DCM_DIS0x050 +#define REG_MMU_WR_LEN 0x054 +#define F_MMU_WR_THROT_DIS_BIT (BIT(5) | BIT(21)) #define REG_MMU_CTRL_REG 0x110 #define F_MMU_TF_PROT_TO_PROGRAM_ADDR (2 << 4) @@ -582,6 +584,12 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); } writel_relaxed(0, data->base + REG_MMU_DCM_DIS); + if (MTK_IOMMU_HAS_FLAG(data->plat_data, WR_THROT_EN)) { + /* write command throttling mode */ + regval = readl_relaxed(data->base + REG_MMU_WR_LEN); + regval &= ~F_MMU_WR_THROT_DIS_BIT; + writel_relaxed(regval, data->base + REG_MMU_WR_LEN); + } regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL); if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { @@ -737,6 +745,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) struct mtk_iommu_suspend_reg *reg = &data->reg; void __iomem *base = data->base; + reg->wr_len = readl_relaxed(base + REG_MMU_WR_LEN); reg->misc_ctrl = readl_relaxed(base + REG_MMU_MISC_CTRL); reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS); reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); @@ -761,6 +770,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); return ret; } + writel_relaxed(reg->wr_len, base + REG_MMU_WR_LEN); writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index be6d32ee5bda..ce4f4e8f03aa 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -24,6 +24,7 @@ #define RESET_AXI BIT(3) #define OUT_ORDER_EN BIT(4) #define HAS_SUB_COMM BIT(5) +#define WR_THROT_ENBIT(6) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ pdata)->flags) & (_x)) == (_x)) @@ -36,6 +37,7 @@ struct mtk_iommu_suspend_reg { u32 int_main_control; u32 ivrp_paddr; u32 vld_pa_rng; + u32 wr_len; }; enum mtk_iommu_plat { -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 01/10] dt-bindings: mediatek: Add bindings for MT6779
This patch adds description for MT6779 IOMMU. MT6779 has two iommus, they are mm_iommu and apu_iommu which both use ARM Short-Descriptor translation format. In addition, mm_iommu and apu_iommu are two independent HW instance , we need to set them separately. The MT6779 IOMMU hardware diagram is as below, it is only a brief diagram about iommu, it don't focus on the part of smi_larb, so I don't describe the smi_larb detailedly. EMI | -- || MM_IOMMUAPU_IOMMU || SMI_COMMOM--- APU_BUS ||| SMI_LARB(0~11) || ||| || -- || | | | Multimedia engine CCU VPU MDLA EMDA All the connections are hardware fixed, software can not adjust it. Signed-off-by: Chao Hao Reviewed-by: Rob Herring --- .../bindings/iommu/mediatek,iommu.txt | 2 + include/dt-bindings/memory/mt6779-larb-port.h | 206 ++ 2 files changed, 208 insertions(+) create mode 100644 include/dt-bindings/memory/mt6779-larb-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt index ce59a505f5a4..c1ccd8582eb2 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt @@ -58,6 +58,7 @@ Required properties: - compatible : must be one of the following string: "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW. "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW. + "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW. "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses generation one m4u HW. "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. @@ -78,6 +79,7 @@ Required properties: Specifies the mtk_m4u_id as defined in dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623 dt-binding/memory/mt2712-larb-port.h for mt2712, + dt-binding/memory/mt6779-larb-port.h for mt6779, dt-binding/memory/mt8173-larb-port.h for mt8173, and dt-binding/memory/mt8183-larb-port.h for mt8183. diff --git a/include/dt-bindings/memory/mt6779-larb-port.h b/include/dt-bindings/memory/mt6779-larb-port.h new file mode 100644 index ..2ad0899fbf2f --- /dev/null +++ b/include/dt-bindings/memory/mt6779-larb-port.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2019 MediaTek Inc. + * Author: Chao Hao + */ + +#ifndef _DTS_IOMMU_PORT_MT6779_H_ +#define _DTS_IOMMU_PORT_MT6779_H_ + +#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) + +#define M4U_LARB0_ID0 +#define M4U_LARB1_ID1 +#define M4U_LARB2_ID2 +#define M4U_LARB3_ID3 +#define M4U_LARB4_ID4 +#define M4U_LARB5_ID5 +#define M4U_LARB6_ID6 +#define M4U_LARB7_ID7 +#define M4U_LARB8_ID8 +#define M4U_LARB9_ID9 +#define M4U_LARB10_ID 10 +#define M4U_LARB11_ID 11 + +/* larb0 */ +#define M4U_PORT_DISP_POSTMASK0 MTK_M4U_ID(M4U_LARB0_ID, 0) +#define M4U_PORT_DISP_OVL0_HDR MTK_M4U_ID(M4U_LARB0_ID, 1) +#define M4U_PORT_DISP_OVL1_HDR MTK_M4U_ID(M4U_LARB0_ID, 2) +#define M4U_PORT_DISP_OVL0 MTK_M4U_ID(M4U_LARB0_ID, 3) +#define M4U_PORT_DISP_OVL1 MTK_M4U_ID(M4U_LARB0_ID, 4) +#define M4U_PORT_DISP_PVRIC0MTK_M4U_ID(M4U_LARB0_ID, 5) +#define M4U_PORT_DISP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 6) +#define M4U_PORT_DISP_WDMA0 MTK_M4U_ID(M4U_LARB0_ID, 7) +#define M4U_PORT_DISP_FAKE0 MTK_M4U_ID(M4U_LARB0_ID, 8) + +/* larb1 */ +#define M4U_PORT_DISP_OVL0_2L_HDR MTK_M4U_ID(M4U_LARB1_ID, 0) +#define M4U_PORT_DISP_OVL1_2L_HDR MTK_M4U_ID(M4U_LARB1_ID, 1) +#define M4U_PORT_DISP_OVL0_2L MTK_M4U_ID(M4U_LARB1_ID, 2) +#define M4U_PORT_DISP_OVL1_2L MTK_M4U_ID(M4U_LARB1_ID, 3) +#define M4U_PORT_DISP_RDMA1 MTK_M4U_ID(M4U_LARB1_ID, 4) +#define M4U_PORT_MDP_PVRIC0 MTK_M4U_ID(M4U_LARB1_ID, 5) +#define M4U_PORT_MDP_PVRIC1 MTK_M4U_ID(M4U_LARB1_ID, 6) +#define M4U_PORT_MDP_RDMA0 MTK_M4U_ID(M4U_LARB1_ID, 7) +#define M4U_PORT_MDP_RDMA1 MTK_M4U_ID(M4U_LARB1_ID, 8) +#define M4U_PORT_MDP_WROT0_R
[PATCH v5 09/10] iommu/mediatek: Modify MMU_CTRL register setting
MT8173 is different from other SoCs for MMU_CTRL register. For mt8173, its bit9 is in_order_write_en and doesn't use its default 1'b1. For other SoCs, bit[12] represents victim_tlb_en feature and victim_tlb is enable defaultly(bit[12]=1), if we use "regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR", victim_tlb will be disabled, it will drop iommu performace. So we need to deal with the setting of MMU_CTRL separately for mt8173 and others. Suggested-by: Matthias Brugger Suggested-by: Yong Wu Signed-off-by: Chao Hao --- drivers/iommu/mtk_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 8299a3299090..e46e2deee3fd 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -543,11 +543,12 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) return ret; } + regval = readl_relaxed(data->base + REG_MMU_CTRL_REG); if (data->plat_data->m4u_plat == M4U_MT8173) regval = F_MMU_PREFETCH_RT_REPLACE_MOD | F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173; else - regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR; + regval |= F_MMU_TF_PROT_TO_PROGRAM_ADDR; writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); regval = F_L2_MULIT_HIT_EN | -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 08/10] iommu/mediatek: Extend protect pa alignment value
Starting with mt6779, iommu needs to extend to 256 bytes from 128 bytes which can send the max number of data for memory protection pa alignment. So we can use a separate patch to modify it. Suggested-by: Matthias Brugger Signed-off-by: Chao Hao --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 92316c4175a9..8299a3299090 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -98,7 +98,7 @@ #define F_MMU_INT_ID_LARB_ID(a)(((a) >> 7) & 0x7) #define F_MMU_INT_ID_PORT_ID(a)(((a) >> 2) & 0x1f) -#define MTK_PROTECT_PA_ALIGN 128 +#define MTK_PROTECT_PA_ALIGN 256 /* * Get the local arbiter ID and the portid within the larb arbiter -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 06/10] iommu/mediatek: Add sub_comm id in translation fault
The max larb number that a iommu HW support is 8(larb0~larb7 in the below diagram). If the larb's number is over 8, we use a sub_common for merging several larbs into one larb. At this case, we will extend larb_id: bit[11:9] means common-id; bit[8:7] means subcommon-id; >From these two variables, we could get the real larb number when translation fault happen. The diagram is as below: EMI | IOMMU | - | | common1 common0 | | - | smi common | | | | | || 3'd03'd13'd23'd3 ... 3'd7 <-common_id(max is 8) | | | | || Larb0 Larb1 | Larb3 ... Larb7 | smi sub common | -- || | | 2'd0 2'd12'd22'd3 <-sub_common_id(max is 4) || | | Larb8Larb9 Larb10 Larb11 In this patch we extend larb_remap[] to larb_remap[8][4] for this. larb_remap[x][y]: x means common-id above, y means subcommon_id above. We can also distinguish if the M4U HW has sub_common by HAS_SUB_COMM macro. Cc: Matthias Brugger Signed-off-by: Chao Hao Reviewed-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 20 +--- drivers/iommu/mtk_iommu.h | 3 ++- include/soc/mediatek/smi.h | 2 ++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 116418cc1cf8..ec1f86913739 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -91,6 +91,8 @@ #define REG_MMU1_INVLD_PA 0x148 #define REG_MMU0_INT_ID0x150 #define REG_MMU1_INT_ID0x154 +#define F_MMU_INT_ID_COMM_ID(a)(((a) >> 9) & 0x7) +#define F_MMU_INT_ID_SUB_COMM_ID(a)(((a) >> 7) & 0x3) #define F_MMU_INT_ID_LARB_ID(a)(((a) >> 7) & 0x7) #define F_MMU_INT_ID_PORT_ID(a)(((a) >> 2) & 0x1f) @@ -229,7 +231,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) struct mtk_iommu_data *data = dev_id; struct mtk_iommu_domain *dom = data->m4u_dom; u32 int_state, regval, fault_iova, fault_pa; - unsigned int fault_larb, fault_port; + unsigned int fault_larb, fault_port, sub_comm = 0; bool layer, write; /* Read error info from registers */ @@ -245,10 +247,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) } layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; - fault_larb = F_MMU_INT_ID_LARB_ID(regval); fault_port = F_MMU_INT_ID_PORT_ID(regval); - - fault_larb = data->plat_data->larbid_remap[fault_larb]; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) { + fault_larb = F_MMU_INT_ID_COMM_ID(regval); + sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); + } else { + fault_larb = F_MMU_INT_ID_LARB_ID(regval); + } + fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; if (report_iommu_fault(&dom->domain, data->dev, fault_iova, write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { @@ -778,7 +784,7 @@ static const struct mtk_iommu_plat_data mt2712_data = { HAS_BCLK | HAS_VLD_PA_RNG, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, - .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, }; static const struct mtk_iommu_plat_data mt8173_data = { @@ -787,14 +793,14 @@ static const struct mtk_iommu_plat_data mt8173_data = { HAS_BCLK | RESET_AXI, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, - .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */ + .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ }; static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, .flags= RESET_AXI, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, - .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1}, + .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; static const struct of_device_id mtk_iommu_of_ids[] = { diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index ae70296af2b4..be6d32ee5bda 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -23,6 +23,7 @@ #define HAS_VLD_PA_RNG BIT(2) #define RESET_AXI BIT(3) #define OUT_ORDER_EN BIT(4) +#define HAS_SUB_COMM
[PATCH v5 03/10] iommu/mediatek: Modify the usage of mtk_iommu_plat_data structure
Given the fact that we are adding more and more plat_data bool values, it would make sense to use a u32 flags register and add the appropriate macro definitions to set and check for a flag present. No functional change. Suggested-by: Matthias Brugger Signed-off-by: Chao Hao --- drivers/iommu/mtk_iommu.c | 23 --- drivers/iommu/mtk_iommu.h | 16 ++-- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 88d3df5b91c2..8f81df6cbe51 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -563,7 +563,8 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) upper_32_bits(data->protect_base); writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR); - if (data->enable_4GB && data->plat_data->has_vld_pa_rng) { + if (data->enable_4GB && + MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_VLD_PA_RNG)) { /* * If 4GB mode is enabled, the validate PA range is from * 0x1__ to 0x1__. here record bit[32:30]. @@ -573,7 +574,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) } writel_relaxed(0, data->base + REG_MMU_DCM_DIS); - if (data->plat_data->reset_axi) { + if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { /* The register is called STANDARD_AXI_MODE in this case */ writel_relaxed(0, data->base + REG_MMU_MISC_CTRL); } @@ -618,7 +619,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) /* Whether the current dram is over 4GB */ data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT)); - if (!data->plat_data->has_4gb_mode) + if (!MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) data->enable_4GB = false; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -631,7 +632,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (data->irq < 0) return data->irq; - if (data->plat_data->has_bclk) { + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_BCLK)) { data->bclk = devm_clk_get(dev, "bclk"); if (IS_ERR(data->bclk)) return PTR_ERR(data->bclk); @@ -763,23 +764,23 @@ static const struct dev_pm_ops mtk_iommu_pm_ops = { static const struct mtk_iommu_plat_data mt2712_data = { .m4u_plat = M4U_MT2712, - .has_4gb_mode = true, - .has_bclk = true, - .has_vld_pa_rng = true, + .flags= HAS_4GB_MODE | + HAS_BCLK | + HAS_VLD_PA_RNG, .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, }; static const struct mtk_iommu_plat_data mt8173_data = { .m4u_plat = M4U_MT8173, - .has_4gb_mode = true, - .has_bclk = true, - .reset_axi= true, + .flags= HAS_4GB_MODE | + HAS_BCLK | + RESET_AXI, .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */ }; static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, - .reset_axi= true, + .flags= RESET_AXI, .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1}, }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 1b6ea839b92c..7cc39f729263 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -17,6 +17,15 @@ #include #include +#define HAS_4GB_MODE BIT(0) +/* HW will use the EMI clock if there isn't the "bclk". */ +#define HAS_BCLK BIT(1) +#define HAS_VLD_PA_RNG BIT(2) +#define RESET_AXI BIT(3) + +#define MTK_IOMMU_HAS_FLAG(pdata, _x) \ + pdata)->flags) & (_x)) == (_x)) + struct mtk_iommu_suspend_reg { u32 misc_ctrl; u32 dcm_dis; @@ -36,12 +45,7 @@ enum mtk_iommu_plat { struct mtk_iommu_plat_data { enum mtk_iommu_plat m4u_plat; - boolhas_4gb_mode; - - /* HW will use the EMI clock if there isn't the "bclk". */ - boolhas_bclk; - boolhas_vld_pa_rng; - boolreset_axi; + u32 flags; unsigned char larbid_remap[MTK_LARB_NR_MAX]; }; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 00/10] MT6779 IOMMU SUPPORT
This patchset adds mt6779 iommu support. mt6779 has two iommus, they are MM_IOMMU(M4U) and APU_IOMMU which used ARM Short-Descriptor translation format. The mt6779's MM_IOMMU-SMI and APU_IOMMU HW diagram is as below, it is only a brief diagram: EMI | -- || MM_IOMMUAPU_IOMMU || SMI_COMMOM--- APU_BUS || | SMI_LARB(0~11) | | || | || -- || | | | Multimedia engineCCU VPU MDLA EMDA All the connections are hardware fixed, software can not adjust it. Compared with mt8183, SMI_BUS_ID width has changed from 10 to 12. SMI Larb number is described in bit[11:7], Port number is described in bit[6:2]. In addition, there are some registers has changed in mt6779, so we need to redefine and reuse them. The patchset only used MM_IOMMU, so we only add MM_IOMMU basic function, such as smi_larb port definition, registers definition and hardware initialization. change notes: v5: 1. Split "iommu/mediatek: Add mt6779 IOMMU basic support(patch v4)" to three patches(from PATCH v5 08/10 to PATCH v5 10/10). 2. Use macro definitions to replace bool values in mtk_iommu_plat_data structure v4: 1. Rebase on v5.8-rc1. 2. Fix coding style. 3. Add F_MMU_IN_DRDER_WR_EN definition in MISC_CTRL to improve performance. https://lkml.org/lkml/2020/6/16/1741 v3: 1. Rebase on v5.7-rc1. 2. Remove unused port definition,ex:APU and CCU port in mt6779-larb-port.h. 3. Remove "change single domain to multiple domain" part(from PATCH v2 09/19 to PATCH v2 19/19). 4. Redesign mt6779 basic part (1)Add some register definition and reuse them. (2)Redesign smi larb bus ID to analyze IOMMU translation fault. (3)Only init MM_IOMMU and not use APU_IOMMU. http://lists.infradead.org/pipermail/linux-mediatek/2020-May/029811.html v2: 1. Rebase on v5.5-rc1. 2. Delete M4U_PORT_UNKNOWN define because of not use it. 3. Correct coding format. 4. Rename offset=0x48 register. 5. Split "iommu/mediatek: Add mt6779 IOMMU basic support(patch v1)" to several patches(patch v2). http://lists.infradead.org/pipermail/linux-mediatek/2020-January/026131.html v1: http://lists.infradead.org/pipermail/linux-mediatek/2019-November/024567.html Chao Hao (10): dt-bindings: mediatek: Add bindings for MT6779 iommu/mediatek: Rename the register STANDARD_AXI_MODE(0x48) to MISC_CTRL iommu/mediatek: Modify the usage of mtk_iommu_plat_data structure iommu/mediatek: Setting MISC_CTRL register iommu/mediatek: Move inv_sel_reg into the plat_data iommu/mediatek: Add sub_comm id in translation fault iommu/mediatek: Add REG_MMU_WR_LEN register definition iommu/mediatek: Extend protect pa alignment value iommu/mediatek: Modify MMU_CTRL register setting iommu/mediatek: Add mt6779 basic support .../bindings/iommu/mediatek,iommu.txt | 2 + drivers/iommu/mtk_iommu.c | 100 ++--- drivers/iommu/mtk_iommu.h | 26 ++- include/dt-bindings/memory/mt6779-larb-port.h | 206 ++ include/soc/mediatek/smi.h| 2 + 5 files changed, 299 insertions(+), 37 deletions(-) -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 10/10] iommu/mediatek: Add mt6779 basic support
1. Start from mt6779, INVLDT_SEL move to offset=0x2c, so we add REG_MMU_INV_SEL_GEN2 definition and mt6779 uses it. 2. Add mt6779_data to support mm_iommu HW init. Cc: Yong Wu Cc: Matthias Brugger Signed-off-by: Chao Hao --- drivers/iommu/mtk_iommu.c | 11 +++ drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e46e2deee3fd..1575196d9cd5 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -37,6 +37,7 @@ #define REG_MMU_INVLD_START_A 0x024 #define REG_MMU_INVLD_END_A0x028 +#define REG_MMU_INV_SEL_GEN2 0x02c #define REG_MMU_INV_SEL_GEN1 0x038 #define F_INVLD_EN0BIT(0) #define F_INVLD_EN1BIT(1) @@ -798,6 +799,15 @@ static const struct mtk_iommu_plat_data mt2712_data = { .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, }; +static const struct mtk_iommu_plat_data mt6779_data = { + .m4u_plat = M4U_MT6779, + .flags = HAS_SUB_COMM | +OUT_ORDER_EN | +WR_THROT_EN, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, +}; + static const struct mtk_iommu_plat_data mt8173_data = { .m4u_plat = M4U_MT8173, .flags= HAS_4GB_MODE | @@ -816,6 +826,7 @@ static const struct mtk_iommu_plat_data mt8183_data = { static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, + { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, {} diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index ce4f4e8f03aa..a080db2e8a93 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -43,6 +43,7 @@ struct mtk_iommu_suspend_reg { enum mtk_iommu_plat { M4U_MT2701, M4U_MT2712, + M4U_MT6779, M4U_MT8173, M4U_MT8183, }; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 02/10] iommu/mediatek: Rename the register STANDARD_AXI_MODE(0x48) to MISC_CTRL
For iommu offset=0x48 register, only the previous mt8173/mt8183 use the name STANDARD_AXI_MODE, all the latest SoC extend the register more feature by different bits, for example: axi_mode, in_order_en, coherent_en and so on. So rename REG_MMU_MISC_CTRL may be more proper. This patch only rename the register name, no functional change. Signed-off-by: Chao Hao Reviewed-by: Yong Wu Reviewed-by: Matthias Brugger --- drivers/iommu/mtk_iommu.c | 14 +++--- drivers/iommu/mtk_iommu.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2be96f1cdbd2..88d3df5b91c2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -41,7 +41,7 @@ #define F_INVLD_EN0BIT(0) #define F_INVLD_EN1BIT(1) -#define REG_MMU_STANDARD_AXI_MODE 0x048 +#define REG_MMU_MISC_CTRL 0x048 #define REG_MMU_DCM_DIS0x050 #define REG_MMU_CTRL_REG 0x110 @@ -573,8 +573,10 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) } writel_relaxed(0, data->base + REG_MMU_DCM_DIS); - if (data->plat_data->reset_axi) - writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE); + if (data->plat_data->reset_axi) { + /* The register is called STANDARD_AXI_MODE in this case */ + writel_relaxed(0, data->base + REG_MMU_MISC_CTRL); + } if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, dev_name(data->dev), (void *)data)) { @@ -718,8 +720,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) struct mtk_iommu_suspend_reg *reg = &data->reg; void __iomem *base = data->base; - reg->standard_axi_mode = readl_relaxed(base + - REG_MMU_STANDARD_AXI_MODE); + reg->misc_ctrl = readl_relaxed(base + REG_MMU_MISC_CTRL); reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS); reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0); @@ -743,8 +744,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); return ret; } - writel_relaxed(reg->standard_axi_mode, - base + REG_MMU_STANDARD_AXI_MODE); + writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG); writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index ea949a324e33..1b6ea839b92c 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -18,7 +18,7 @@ #include struct mtk_iommu_suspend_reg { - u32 standard_axi_mode; + u32 misc_ctrl; u32 dcm_dis; u32 ctrl_reg; u32 int_control0; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v5 04/10] iommu/mediatek: Setting MISC_CTRL register
Add F_MMU_IN_ORDER_WR_EN and F_MMU_STANDARD_AXI_MODE_BIT definition in MISC_CTRL register. F_MMU_STANDARD_AXI_MODE_BIT: If we set F_MMU_STANDARD_AXI_MODE_BIT(bit[3][19] = 0, not follow standard AXI protocol), iommu will send urgent read command firstly compare with normal read command to improve performance. F_MMU_IN_ORDER_WR_EN: If we set F_MMU_IN_ORDER_WR_EN(bit[1][17] = 0, out-of-order write), iommu will re-order write command and send more higher priority write command instead of sending write command in order. The feature be controlled by OUT_ORDER_EN macro definition. Cc: Matthias Brugger Suggested-by: Yong Wu Signed-off-by: Chao Hao --- drivers/iommu/mtk_iommu.c | 12 +++- drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 8f81df6cbe51..67b46b5d83d9 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -42,6 +42,9 @@ #define F_INVLD_EN1BIT(1) #define REG_MMU_MISC_CTRL 0x048 +#define F_MMU_IN_ORDER_WR_EN (BIT(1) | BIT(17)) +#define F_MMU_STANDARD_AXI_MODE_BIT(BIT(3) | BIT(19)) + #define REG_MMU_DCM_DIS0x050 #define REG_MMU_CTRL_REG 0x110 @@ -574,10 +577,17 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) } writel_relaxed(0, data->base + REG_MMU_DCM_DIS); + regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL); if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { /* The register is called STANDARD_AXI_MODE in this case */ - writel_relaxed(0, data->base + REG_MMU_MISC_CTRL); + regval = 0; + } else { + /* For mm_iommu, it can improve performance by the setting */ + regval &= ~F_MMU_STANDARD_AXI_MODE_BIT; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, OUT_ORDER_EN)) + regval &= ~F_MMU_IN_ORDER_WR_EN; } + writel_relaxed(regval, data->base + REG_MMU_MISC_CTRL); if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, dev_name(data->dev), (void *)data)) { diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 7cc39f729263..4b780b651ef4 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -22,6 +22,7 @@ #define HAS_BCLK BIT(1) #define HAS_VLD_PA_RNG BIT(2) #define RESET_AXI BIT(3) +#define OUT_ORDER_EN BIT(4) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ pdata)->flags) & (_x)) == (_x)) -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu