[Patch Part1 v3 17/20] x86: irq_remapping: Use helpers to access irq_cfg data structure associated with IRQ
Use helpers to access irq_cfg data structure associated with IRQ, instead of accessing irq_data-chip_data directly. Later we could rewrite those helpers to support hierarchy irqdomain. Signed-off-by: Jiang Liu jiang@linux.intel.com --- drivers/iommu/irq_remapping.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 74a1767c89b5..e9fbd68db96e 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -306,7 +306,7 @@ static int set_remapped_irq_affinity(struct irq_data *data, void free_remapped_irq(int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); if (!remap_ops || !remap_ops-free_irq) return; @@ -319,7 +319,7 @@ void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); if (!irq_remapped(cfg)) native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); @@ -372,7 +372,7 @@ static void ir_ack_apic_edge(struct irq_data *data) static void ir_ack_apic_level(struct irq_data *data) { ack_APIC_irq(); - eoi_ioapic_irq(data-irq, data-chip_data); + eoi_ioapic_irq(data-irq, irqd_cfg(data)); } static void ir_print_prefix(struct irq_data *data, struct seq_file *p) -- 1.7.10.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[Patch Part1 v3 18/20] iommu/vt-d: Use helpers to access irq_cfg data structure associated with IRQ
Use helpers to access irq_cfg data structure associated with IRQ, instead of accessing irq_data-chip_data directly. Later we could rewrite those helpers to support hierarchy irqdomain. Signed-off-by: Jiang Liu jiang@linux.intel.com --- drivers/iommu/intel_irq_remapping.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 7c80661b35c1..fd181cf8a589 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -55,7 +55,7 @@ static int __init parse_ioapics_under_ir(void); static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); return cfg ? cfg-irq_2_iommu : NULL; } @@ -86,7 +86,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) { struct ir_table *table = iommu-ir_table; struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned int mask = 0; unsigned long flags; int index; @@ -154,7 +154,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle) static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) { struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; if (!irq_iommu) @@ -1008,7 +1008,7 @@ static int intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = data-chip_data; + struct irq_cfg *cfg = irqd_cfg(data); unsigned int dest, irq = data-irq; struct irte irte; int err; @@ -1063,7 +1063,7 @@ static void intel_compose_msi_msg(struct pci_dev *pdev, u16 sub_handle = 0; int ir_index; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); ir_index = map_irq_to_irte_handle(irq, sub_handle); BUG_ON(ir_index == -1); -- 1.7.10.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[Patch Part1 v3 19/20] iommu/amd: Use helpers to access irq_cfg data structure associated with IRQ
Use helpers to access irq_cfg data structure associated with IRQ, instead of accessing irq_data-chip_data directly. Later we could rewrite those helpers to support hierarchy irqdomain. Signed-off-by: Jiang Liu jiang@linux.intel.com --- drivers/iommu/amd_iommu.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 505a9adac2d5..6fda7cc789eb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4068,7 +4068,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, int devid; int ret; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4131,7 +4131,7 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, if (!config_enabled(CONFIG_SMP)) return -1; - cfg = data-chip_data; + cfg = irqd_cfg(data); irq = data-irq; irte_info = cfg-irq_2_irte; @@ -4169,7 +4169,7 @@ static int free_irq(int irq) struct irq_2_irte *irte_info; struct irq_cfg *cfg; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4188,7 +4188,7 @@ static void compose_msi_msg(struct pci_dev *pdev, struct irq_cfg *cfg; union irte irte; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return; @@ -4217,7 +4217,7 @@ static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) if (!pdev) return -EINVAL; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4237,7 +4237,7 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, if (!pdev) return -EINVAL; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4260,7 +4260,7 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id) struct irq_cfg *cfg; int index, devid; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; -- 1.7.10.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 1/3] iommu/rockchip: rk3288 iommu driver
On Mon, Oct 27, 2014 at 4:32 AM, Heiko Stübner he...@sntech.de wrote: Hi Daniel, Am Freitag, 24. Oktober 2014, 15:33:47 schrieb Daniel Kurtz: [...] +static int rk_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct rk_iommu *iommu = dev_get_drvdata(dev-archdata.iommu); Here I get a null-ptr dereference [0] when using the iommu driver with the pending drm changes. That's what I get for testing against a heavily modified v3.14-based kernel... In v3.14, dev_get_drvdata() would happily return NULL if dev=NULL. This feature was removed in v3.15 by this patch: commit d4332013919aa87dbdede67d677e4cf2cd32e898 Author: Jean Delvare jdelv...@suse.de Date: Mon Apr 14 12:57:43 2014 +0200 driver core: dev_get_drvdata: Don't check for NULL dev + struct rk_iommu_domain *rk_domain = domain-priv; + unsigned long flags; + int ret; + phys_addr_t dte_addr; + + /* + * Allow 'virtual devices' (e.g., drm) to attach to domain. + * Such a device has a NULL archdata.iommu. + */ + if (!iommu) When the comment is correct, the code should probably do something like the following? if (!dev-archdata.iommu) return 0; iommu = dev_get_drvdata(dev-archdata.iommu); Yes, that looks reasonable. + return 0; + + ret = rk_iommu_enable_stall(iommu); + if (ret) + return ret; + + ret = rk_iommu_force_reset(iommu); + if (ret) + return ret; + + iommu-domain = domain; + + ret = devm_request_irq(dev, iommu-irq, rk_iommu_irq, +IRQF_SHARED, dev_name(dev), iommu); + if (ret) + return ret; + + dte_addr = virt_to_phys(rk_domain-dt); + rk_iommu_write(iommu, RK_MMU_DTE_ADDR, dte_addr); + rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE); + rk_iommu_write(iommu, RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); + + ret = rk_iommu_enable_paging(iommu); + if (ret) + return ret; + + spin_lock_irqsave(rk_domain-iommus_lock, flags); + list_add_tail(iommu-node, rk_domain-iommus); + spin_unlock_irqrestore(rk_domain-iommus_lock, flags); + + dev_info(dev, Attached to iommu domain\n); + + rk_iommu_disable_stall(iommu); + + return 0; +} [...] + +static struct platform_driver rk_iommu_driver = { + .probe = rk_iommu_probe, + .remove = rk_iommu_remove, + .driver = { +.name = rk_iommu, +.owner = THIS_MODULE, +.of_match_table = of_match_ptr(rk_iommu_dt_ids), + }, +}; + +static int __init rk_iommu_init(void) +{ + int ret; + + ret = bus_set_iommu(platform_bus_type, rk_iommu_ops); on 3.18-rc1 this fails with -ENODEV, as add_iommu_group() is missing the add_device callback in rk_iommu_ops, so the iommu driver actually never gets registered. v3.18-rc1 has patch [0] which changes bus_set_iommu()-iommu_bus_init() to propagate the return value of add_iommu_group(), whereas it was ignored in v3.17. [0] commit fb3e306515ba6a012364b698b8ca71c337424ed3 Author: Mark Salter msal...@redhat.com Date: Sun Sep 21 13:58:24 2014 -0400 iommu: Fix bus notifier breakage This patch made it mandatory that iommu drivers provide an add_group callback. I'm not exactly sure why. Iommu groups do not seem to be a good fit for the rockchip iommus, since the iommus are all 1:1 with their master device. The exynos add_group() is a possibility, however, it causes an iommu_group to be allocated for every single platform_device, even if they do not use an iommu. This seems very wasteful. Instead we can check the device's dt node for an iommus field to a phandle with a #iommu-cells field. Also, perhaps the add_device() is a good place to stick other generic device initialization code, which we are currently sprinkling in the drivers of rockchip iommu masters (drm/codec). Other drivers do this: * shmobile: sets up the iommu mapping with arm_iommu_create_mapping() / arm_iommu_attach_device() * omap: use of_parse_phandle()/of_find_device_by_node() to set a master device's dev-archdata.iommu. Or, perhaps we can just ignore iommu groups entirely and use dummy functions: static int rk_iommu_add_device(struct device *dev) { return 0; } static void rk_iommu_remove_device(struct device *dev) { } I'll investigate more. -Dan I've stolen the generic add_device and remove_device callbacks from the exynos iommu driver which makes the rk one at least probe. Can't say how far it goes, as I'm still struggling with the floating display subsystem parts. My current diff against this version can be found in [1]. Maybe the issue I had in attach_device also simply resulted from this one, not sure right now. Heiko + if (ret) + return ret; + + return platform_driver_register(rk_iommu_driver); +} +static void __exit rk_iommu_exit(void) +{ +
Re: [PATCH 0/5] iommu/vt-d: Fix crash dump failure caused by legacy DMA/IO
On 10/27/14 at 03:29pm, Li, ZhenHua wrote: Hi Baoquan, I failed in testing this patchset for 3.18.0-rc1, this upstream 3.18.0-rc1 kernel cannot boot on my system, have not found out the reason. Could you please test this patchset on 3.17.0 to see whether it has these faults? Thanks Zhenhua Failed too on 3.17.0, check the log as below: [0.103751] Mount-cache hash table entries: 512 (order: 0, 4096 bytes) [0.110285] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes) [0.117549] Initializing cgroup subsys memory [0.121917] Initializing cgroup subsys devices [0.126367] Initializing cgroup subsys freezer [0.130817] Initializing cgroup subsys net_cls [0.135265] Initializing cgroup subsys blkio [0.139545] Initializing cgroup subsys perf_event [0.144254] Initializing cgroup subsys hugetlb [0.148741] CPU: Physical Processor ID: 0 [0.152751] CPU: Processor Core ID: 1 [0.156427] Last level iTLB entries: 4KB 512, 2MB 8, 4MB 8 [0.156427] Last level dTLB entries: 4KB 512, 2MB 32, 4MB 32, 1GB 0 [0.180040] Freeing SMP alternatives memory: 24K (ade7a000 - ade8) [0.190787] ftrace: allocating 26881 entries in 106 pages [0.222955] dmar: Host address width 46 [0.226796] dmar: DRHD base: 0x00dfffc000 flags: 0x1 [0.232128] dmar: IOMMU 0: reg_base_addr dfffc000 ver 1:0 cap d2078c106f0462 ecap f020fe [0.240223] dmar: RMRR base: 0x00cba11000 end: 0x00cba27fff [0.246495] dmar: ATSR flags: 0x0 [0.249921] IOAPIC id 0 under DRHD base 0xdfffc000 IOMMU 0 [0.255499] IOAPIC id 2 under DRHD base 0xdfffc000 IOMMU 0 [0.261076] HPET id 0 under DRHD base 0xdfffc000 [0.265899] Enabled IRQ remapping in xapic mode [0.271030] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 [0.287077] smpboot: CPU0: Intel(R) Xeon(R) CPU E5-1603 0 @ 2.80GHz (fam: 06, model: 2d, stepping: 07) [0.296535] Performance Events: PEBS fmt1+, 16-deep LBR, SandyBridge events, full-width counters, Broken BIOS detected, complain to your hardware vendor. [0.310427] [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0) [0.318087] Intel PMU driver. [0.321065] ... version:3 [0.325077] ... bit width: 48 [0.329180] ... generic registers: 8 [0.333198] ... value mask: [0.338516] ... max period: [0.343834] ... fixed-purpose events: 3 [0.347848] ... event mask: 000700ff [0.355607] x86: Booted up 1 node, 1 CPUs [0.359627] smpboot: Total of 1 processors activated (5586.06 BogoMIPS) [0.366281] NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter. [0.377496] devtmpfs: initialized [0.386629] PM: Registering ACPI NVS region [mem 0xcb75-0xcb7dafff] (569344 bytes) [0.394583] PM: Registering ACPI NVS region [mem 0xcbaad000-0xcbaaefff] (8192 bytes) [0.402337] PM: Registering ACPI NVS region [mem 0xcbabb000-0xcbacdfff] (77824 bytes) [0.410169] PM: Registering ACPI NVS region [mem 0xcbb56000-0xcbb5dfff] (32768 bytes) [0.418005] PM: Registering ACPI NVS region [mem 0xcbb71000-0xcbff] (4780032 bytes) [0.427905] atomic64_test: passed for x86-64 platform with CX8 and with SSE [0.434883] pinctrl core: initialized pinctrl subsystem [0.440171] RTC time: 10:38:17, date: 10/27/14 [0.444783] NET: Registered protocol family 16 [0.449652] cpuidle: using governor menu [0.453820] ACPI: bus type PCI registered [0.457841] acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5 [0.464406] PCI: MMCONFIG for domain [bus 00-ff] at [mem 0xe000-0xefff] (base 0xe000) [0.473718] PCI: MMCONFIG at [mem 0xe000-0xefff] reserved in E820 [0.481119] PCI: Using configuration type 1 for base access [0.489116] ACPI: Added _OSI(Module Device) [0.493313] ACPI: Added _OSI(Processor Device) [0.497768] ACPI: Added _OSI(3.0 _SCP Extensions) [0.502477] ACPI: Added _OSI(Processor Aggregator Device) [0.521054] ACPI: Executed 1 blocks of module-level executable AML code [0.653647] ACPI: Interpreter enabled [0.657334] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S1_] (20140724/hwxface-580) [0.10] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S2_] (20140724/hwxface-580) [0.675902] ACPI: (supports S0 S3 S4 S5) [0.679833] ACPI: Using IOAPIC for interrupt routing [0.684858] PCI: Using host bridge windows from ACPI; if necessary, use pci=nocrs and report a bug [0.695495] [Firmware Bug]: ACPI: BIOS _OSI(Linux) query ignored [0.717663] ACPI: PCI Root Bridge [PCI0] (domain [bus 00-7f]) [0.723860] acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI] [0.732282] acpi PNP0A08:00: _OSC: platform does not support [PCIeCapability] [0.739533] acpi PNP0A08:00: _OSC:
Re: [RFC PATCH v3 5/7] dma-mapping: detect and configure IOMMU in of_dma_configure
On Tue, Oct 14, 2014 at 01:53:59PM +0100, Laurent Pinchart wrote: Hi Will, Hi Laurent, On Monday 22 September 2014 18:50:27 Will Deacon wrote: On Mon, Sep 22, 2014 at 10:29:10AM +0100, Thierry Reding wrote: Agreed. I wonder how useful it is to know the set of IOMMU instances that each device can master through. Wouldn't it be more useful to keep a list of master interfaces for each device? The set of IOMMU instances can trivially be derived from that. I'm struggling to think how that would look. What do you mean by `master interfaces' in terms of the code we have in Linux? At the end of the day, the list of IOMMU instances (i.e. iommu_dma_mapping) exists because you and Laurent have use-cases involving devices mastering through multiple IOMMUs. If it doesn't work for you, it might be best for you to send me the patch ;) Just for the record, I've brought up the topic of masters being served by multiple IOMMUs, but don't have a use case for it (yet at least). I do have masters served through multiple streams with separate stream IDs, but all by the same IOMMU. Ok. I spoke to Arnd, David and Joerg at LPC and the consensus was that the DMA-mapping API should *not* be exposed to the details of masters that master through multiple IOMMUs. Instead, that should be abstracted by the device API by exposing that device as a single struct device. So, that's certainly an area that needs more work and I'll drop the limited support I'd cooked up from this patch set in the next version. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH v3 5/7] dma-mapping: detect and configure IOMMU in of_dma_configure
Hello, On 2014-10-27 11:51, Will Deacon wrote: On Tue, Oct 14, 2014 at 01:53:59PM +0100, Laurent Pinchart wrote: On Monday 22 September 2014 18:50:27 Will Deacon wrote: On Mon, Sep 22, 2014 at 10:29:10AM +0100, Thierry Reding wrote: Agreed. I wonder how useful it is to know the set of IOMMU instances that each device can master through. Wouldn't it be more useful to keep a list of master interfaces for each device? The set of IOMMU instances can trivially be derived from that. I'm struggling to think how that would look. What do you mean by `master interfaces' in terms of the code we have in Linux? At the end of the day, the list of IOMMU instances (i.e. iommu_dma_mapping) exists because you and Laurent have use-cases involving devices mastering through multiple IOMMUs. If it doesn't work for you, it might be best for you to send me the patch ;) Just for the record, I've brought up the topic of masters being served by multiple IOMMUs, but don't have a use case for it (yet at least). I do have masters served through multiple streams with separate stream IDs, but all by the same IOMMU. Ok. I spoke to Arnd, David and Joerg at LPC and the consensus was that the DMA-mapping API should *not* be exposed to the details of masters that master through multiple IOMMUs. Instead, that should be abstracted by the device API by exposing that device as a single struct device. So, that's certainly an area that needs more work and I'll drop the limited support I'd cooked up from this patch set in the next version. Great! That's more or less something I've already implemented on top of your previous patchset, as I didn't have any good idea how to manage multiple masters separately. I'm waiting for your next update and I will rebase my patches soon. Best regards -- Marek Szyprowski, PhD Samsung RD Institute Poland ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v6 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR
On Tue, Oct 14, 2014 at 10:53:30PM +0100, Mitchel Humpherys wrote: Currently, we provide the iommu_ops.iova_to_phys service by doing a table walk in software to translate IO virtual addresses to physical addresses. On SMMUs that support it, it can be useful to ask the SMMU itself to do the translation. This can be used to warm the TLBs for an SMMU. It can also be useful for testing and hardware validation. Since the address translation registers are optional on SMMUv2, only enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1 and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec. [...] -static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, +static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain, dma_addr_t iova) { pgd_t *pgdp, pgd; @@ -1557,6 +1569,66 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, return __pfn_to_phys(pte_pfn(pte)) | (iova ~PAGE_MASK); } +static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct arm_smmu_domain *smmu_domain = domain-priv; + struct arm_smmu_device *smmu = smmu_domain-smmu; + struct arm_smmu_cfg *cfg = smmu_domain-cfg; + struct device *dev = smmu-dev; + void __iomem *cb_base; + u32 tmp; + u64 phys; + unsigned long flags; + + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg-cbndx); + + spin_lock_irqsave(smmu_domain-lock, flags); + + if (smmu-version == 1) { + u32 reg = iova ~0xfff; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); + } else { + u32 reg = iova ~0xfff; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); + reg = (iova ~0xfff) 32; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI); + } + + if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, + !(tmp ATSR_ACTIVE), 5, 50)) { + dev_err(dev, + iova to phys timed out on 0x%pa. Falling back to software table walk.\n, + iova); + return arm_smmu_iova_to_phys_soft(domain, iova); Missing unlock here. + } + + phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO); + phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) 32; + + spin_unlock_irqrestore(smmu_domain-lock, flags); + + if (phys CB_PAR_F) { + dev_err(dev, translation fault!\n); + dev_err(dev, PAR = 0x%llx\n, phys); + phys = 0; + } else { + phys = (phys 0xfff000ULL) | (iova 0x0fff); + } I think your mask is too big here -- SMMUv2 puts the MAIR in the top byte. It probably makes more sense to use PHYS_MASK ~0xfffUL. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] arm: dma-mapping: fix compilation error when CONFIG_MMU is not present
Hello, On 2014-10-24 15:15, Stefan Hengelein wrote: Hello, adding a dependency to CONFIG_ARM_DMA_USE_IOMMU is probably not a good idea anyways. If you read the Documentation/kbuild/kconfig-language.txt, you will see that: 1. in general, selected options should have no prompt and no dependencies 2. giving ARM_DMA_USE_IOMMU a dependency on MMU will and cannot solve the problem, since ARM_DMA_USE_IOMMU itself is selected by several sources and select will not enforce dependencies to be resolved (to be precise, it will simply ignore them). Thus, CONFIG_MMU will not be enabled (as you can see in your warning). If you want to ensure CONFIG_MMU is enabled when CONFIG_ARM_DMA_USE_IOMMU is selected, the selecting options (e.g. SHMOBILE_IOMMU) should have a dependency on MMU. However, i'm not sure if that's the right way to solve it, since it will hide these options when MMU is not enabled. Making the whole IOMMU subsystem depend on MMU would probably also solve the problem. Is there any situation where it would make sense to use an IOMMU without an MMU? IMHO such configuration doesn't make sense and hiding IOMMU section completely when non-MMU systems are enabled is the right approach. Best regards -- Marek Szyprowski, PhD Samsung RD Institute Poland ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v6 1/3] iommu/rockchip: rk3288 iommu driver
The rk3288 has several iommus. Each iommu belongs to a single master device. There is one device (ISP) that has two slave iommus, but that case is not yet supported by this driver. At subsys init, the iommu driver registers itself as the iommu driver for the platform bus. The master devices find their slave iommus using the iommus field in their devicetree description. Since each slave iommu belongs to exactly one master, their is no additional data needed at probe to associate a slave with its master. An iommu device's power domain, clock and irq are all shared with its master device, and the master device must be careful to attach from the iommu only after powering and clocking it (and leave it powered and clocked before detaching). Because their is no guarantee what the status of the iommu is at probe, and since the driver does not even know if the device is powered, we delay requesting its irq until the master device attaches, at which point we have a guarantee that the device is powered and clocked and we can reset it and disable its interrupt mask. An iommu_domain describes a virtual iova address space. Each iommu_domain has a corresponding page table that lists the mappings from iova to physical address. For the rk3288 iommu, the page table has two levels: The Level 1 directory_table has 1024 4-byte dte entries. Each dte points to a level 2 page_table. Each level 2 page_table has 1024 4-byte pte entries. Each pte points to a 4 KiB page of memory. An iommu_domain is created when a dma_iommu_mapping is created via arm_iommu_create_mapping. Master devices can then attach themselves to this mapping (or attach the mapping to themselves?) by calling arm_iommu_attach_device(). This in turn instructs the iommu driver to write the page table's physical address into the slave iommu's Directory Table Entry (DTE) register. In fact multiple master devices, each with their own slave iommu device, can all attach to the same mapping. The iommus for these devices will share the same iommu_domain and therefore point to the same page table. Thus, the iommu domain maintains a list of iommu devices which are attached. This driver relies on the iommu core to ensure that all devices have detached before destroying a domain. Changes in v6: - add .add/remove_device() callbacks. - parse platform_device device tree nodes for iommus property - store platform device pointer as group iommudata - Check for existence of iommu group instead of relying on a dev_get_drvdata() to return NULL for a NULL device. Signed-off-by: Daniel Kurtz djku...@chromium.org Signed-off-by: Simon Xue x...@rock-chips.com Reviewed-by: Grant Grundler grund...@chromium.org Reviewed-by: Stéphane Marchesin marc...@chromium.org --- drivers/iommu/Kconfig | 12 + drivers/iommu/Makefile |1 + drivers/iommu/rockchip-iommu.c | 1038 3 files changed, 1051 insertions(+) create mode 100644 drivers/iommu/rockchip-iommu.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index dd51122..d0a1261 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -152,6 +152,18 @@ config OMAP_IOMMU_DEBUG Say N unless you know you need this. +config ROCKCHIP_IOMMU + bool Rockchip IOMMU Support + depends on ARCH_ROCKCHIP + select IOMMU_API + select ARM_DMA_USE_IOMMU + help + Support for IOMMUs found on Rockchip rk32xx SOCs. + These IOMMUs allow virtualization of the address space used by most + cores within the multimedia subsystem. + Say Y here if you are using a Rockchip SoC that includes an IOMMU + device. + config TEGRA_IOMMU_GART bool Tegra GART IOMMU Support depends on ARCH_TEGRA_2x_SOC diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 16edef7..3e47ef3 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o +obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c new file mode 100644 index 000..61d6f87 --- /dev/null +++ b/drivers/iommu/rockchip-iommu.c @@ -0,0 +1,1038 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include asm/cacheflush.h +#include asm/pgtable.h +#include linux/compiler.h +#include linux/delay.h +#include linux/device.h +#include linux/errno.h +#include linux/interrupt.h +#include linux/io.h +#include linux/iommu.h +#include
[PATCH 11/16] s390/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com Acked-by: Sebastian Ott seb...@linux.vnet.ibm.com --- arch/s390/include/asm/pci.h |1 + arch/s390/pci/pci.c | 19 +-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c030900..bf14da2 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -88,6 +88,7 @@ struct zpci_dev { u32 uid;/* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ + struct msi_controller *msi_ctrl; /* IRQ stuff */ u64 msi_addr; /* MSI address */ struct airq_iv *aibv; /* adapter interrupt bit vector */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 552b990..beed5ab 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -358,7 +358,15 @@ static void zpci_irq_handler(struct airq_struct *airq) } } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) +{ + struct zpci_dev *zpci = bus-sysdata; + + return zpci-msi_ctrl; +} + +static int zpci_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *pdev, int nvec, int type) { struct zpci_dev *zdev = get_zdev(pdev); unsigned int hwirq, msi_vecs; @@ -434,7 +442,8 @@ out: return rc; } -void arch_teardown_msi_irqs(struct pci_dev *pdev) +static void zpci_teardown_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); struct msi_desc *msi; @@ -464,6 +473,11 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) airq_iv_free_bit(zpci_aisb_iv, zdev-aisb); } +static struct msi_controller zpci_msi_ctrl = { + .setup_irqs = zpci_setup_msi_irqs, + .teardown_irqs = zpci_teardown_msi_irqs, +}; + static void zpci_map_resources(struct zpci_dev *zdev) { struct pci_dev *pdev = zdev-pdev; @@ -749,6 +763,7 @@ static int zpci_scan_bus(struct zpci_dev *zdev) if (ret) return ret; + zdev-msi_ctrl = zpci_msi_ctrl; zdev-bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, pci_root_ops, zdev, resources); if (!zdev-bus) { -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 13/16] IA64/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/ia64/include/asm/pci.h |3 ++- arch/ia64/kernel/msi_ia64.c | 24 ++-- arch/ia64/pci/pci.c |1 + 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 52af5ed..805bbc3 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -93,7 +93,7 @@ struct pci_controller { void *iommu; int segment; int node; /* nearest node with memory or NUMA_NO_NODE for global allocation */ - + struct msi_controller *msi_ctrl; void *platform_data; }; @@ -101,6 +101,7 @@ struct pci_controller { #define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev-sysdata) #define pci_domain_nr(busdev)(PCI_CONTROLLER(busdev)-segment) +extern struct msi_controller ia64_msi_ctrl; extern struct pci_ops pci_root_ops; static inline int pci_proc_domain(struct pci_bus *bus) diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 8c3730c..b92b8e2 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -42,7 +42,7 @@ static int ia64_set_msi_irq_affinity(struct irq_data *idata, } #endif /* CONFIG_SMP */ -int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +int __ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) { struct msi_msg msg; unsigned long dest_phys_id; @@ -77,7 +77,7 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) return 0; } -void ia64_teardown_msi_irq(unsigned int irq) +void __ia64_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); } @@ -111,23 +111,35 @@ static struct irq_chip ia64_msi_chip = { .irq_retrigger = ia64_msi_retrigger_irq, }; +struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) +{ + struct pci_controller *ctrl = bus-sysdata; + + return ctrl-msi_ctrl; +} -int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +static int ia64_setup_msi_irq(struct msi_controller *ctrl, + struct pci_dev *pdev, struct msi_desc *desc) { if (platform_setup_msi_irq) return platform_setup_msi_irq(pdev, desc); - return ia64_setup_msi_irq(pdev, desc); + return __ia64_setup_msi_irq(pdev, desc); } -void arch_teardown_msi_irq(unsigned int irq) +static void ia64_teardown_msi_irq(struct msi_controller *ctrl, unsigned int irq) { if (platform_teardown_msi_irq) return platform_teardown_msi_irq(irq); - return ia64_teardown_msi_irq(irq); + return __ia64_teardown_msi_irq(irq); } +struct msi_controller ia64_msi_ctrl = { + .setup_irq = ia64_setup_msi_irq, + .teardown_irq = ia64_teardown_msi_irq, +}; + #ifdef CONFIG_INTEL_IOMMU #ifdef CONFIG_SMP static int dmar_msi_set_affinity(struct irq_data *data, diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 291a582..875f46a 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -437,6 +437,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) controller-companion = device; controller-node = acpi_get_node(device-handle); + controller-msi_ctrl = ia64_msi_ctrl; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 07/16] MIPS/Octeon/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/mips/include/asm/octeon/pci-octeon.h |4 +++ arch/mips/pci/msi-octeon.c| 31 arch/mips/pci/pci-octeon.c|3 ++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/mips/include/asm/octeon/pci-octeon.h b/arch/mips/include/asm/octeon/pci-octeon.h index 64ba56a..61c038d 100644 --- a/arch/mips/include/asm/octeon/pci-octeon.h +++ b/arch/mips/include/asm/octeon/pci-octeon.h @@ -66,4 +66,8 @@ enum octeon_dma_bar_type { */ extern enum octeon_dma_bar_type octeon_dma_bar_type; +#ifdef CONFIG_PCI_MSI +extern struct msi_controller octeon_msi_ctrl; +#endif + #endif diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c index 63bbe07..30976da 100644 --- a/arch/mips/pci/msi-octeon.c +++ b/arch/mips/pci/msi-octeon.c @@ -57,7 +57,7 @@ static int msi_irq_size; * * Returns 0 on success. */ -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +static int octeon_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; u16 control; @@ -132,12 +132,12 @@ msi_irq_allocated: /* Make sure the search for available interrupts didn't fail */ if (irq = 64) { if (request_private_bits) { - pr_err(arch_setup_msi_irq: Unable to find %d free interrupts, trying just one, - 1 request_private_bits); + pr_err(%s: Unable to find %d free interrupts, trying just one, + __func__, 1 request_private_bits); request_private_bits = 0; goto try_only_one; } else - panic(arch_setup_msi_irq: Unable to find a free MSI interrupt); + panic(%s: Unable to find a free MSI interrupt, __func__); } /* MSI interrupts start at logical IRQ OCTEON_IRQ_MSI_BIT0 */ @@ -168,7 +168,7 @@ msi_irq_allocated: msg.address_hi = (0 + CVMX_SLI_PCIE_MSI_RCV) 32; break; default: - panic(arch_setup_msi_irq: Invalid octeon_dma_bar_type); + panic(%s: Invalid octeon_dma_bar_type, __func__); } msg.data = irq - OCTEON_IRQ_MSI_BIT0; @@ -182,7 +182,8 @@ msi_irq_allocated: return 0; } -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int octeon_setup_msi_irqs(struct msi_controller *ctrl, struct pci_dev *dev, + int nvec, int type) { struct msi_desc *entry; int ret; @@ -201,7 +202,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; list_for_each_entry(entry, dev-msi_list, list) { - ret = arch_setup_msi_irq(dev, entry); + ret = octeon_setup_msi_irq(dev, entry); if (ret 0) return ret; if (ret 0) @@ -210,14 +211,13 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 0; } - /** * Called when a device no longer needs its MSI interrupts. All * MSI interrupts for the device are freed. * * @irq:The devices first irq number. There may be multple in sequence. */ -void arch_teardown_msi_irq(unsigned int irq) +static void octeon_teardown_msi_irq(struct msi_controller *ctrl, unsigned int irq) { int number_irqs; u64 bitmask; @@ -226,8 +226,8 @@ void arch_teardown_msi_irq(unsigned int irq) if ((irq OCTEON_IRQ_MSI_BIT0) || (irq msi_irq_size + OCTEON_IRQ_MSI_BIT0)) - panic(arch_teardown_msi_irq: Attempted to teardown illegal - MSI interrupt (%d), irq); + panic(%s: Attempted to teardown illegal + MSI interrupt (%d), __func__, irq); irq -= OCTEON_IRQ_MSI_BIT0; index = irq / 64; @@ -249,8 +249,8 @@ void arch_teardown_msi_irq(unsigned int irq) /* Shift the mask to the correct bit location */ bitmask = irq0; if ((msi_free_irq_bitmask[index] bitmask) != bitmask) - panic(arch_teardown_msi_irq: Attempted to teardown MSI - interrupt (%d) not in use, irq); + panic(%s: Attempted to teardown MSI + interrupt (%d) not in use, __func__, irq); /* Checks are done, update the in use bitmask */ spin_lock(msi_free_irq_bitmask_lock); @@ -259,6 +259,11 @@ void arch_teardown_msi_irq(unsigned int irq) spin_unlock(msi_free_irq_bitmask_lock); } +struct msi_controller octeon_msi_ctrl = { + .setup_irqs = octeon_setup_msi_irqs, + .teardown_irq = octeon_teardown_msi_irq, +}; + static
[PATCH 10/16] Powerpc/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com Acked-by: Michael Ellerman m...@ellerman.id.au --- arch/powerpc/include/asm/pci-bridge.h |8 arch/powerpc/kernel/msi.c | 19 +-- arch/powerpc/kernel/pci-common.c |3 +++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 4ca90a3..f7d09d0 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -32,6 +32,10 @@ struct pci_controller { int self_busno; struct resource busn; +#ifdef CONFIG_PCI_MSI + struct msi_controller *msi_ctrl; +#endif + void __iomem *io_base_virt; #ifdef CONFIG_PPC64 void *io_base_alloc; @@ -94,6 +98,10 @@ struct pci_controller { void *private_data; }; +#ifdef CONFIG_PCI_MSI +extern struct msi_controller ppc_msi_ctrl; +#endif + /* These are used for config access before all the PCI probing has been done. */ extern int early_read_config_byte(struct pci_controller *hose, int bus, diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 71bd161..64a16f3 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -13,7 +13,15 @@ #include asm/machdep.h -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) +{ + struct pci_controller *hose = bus-sysdata; + + return hose-msi_ctrl; +} + +static int ppc_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev, int nvec, int type) { if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) { pr_debug(msi: Platform doesn't provide MSI callbacks.\n); @@ -27,7 +35,13 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return ppc_md.setup_msi_irqs(dev, nvec, type); } -void arch_teardown_msi_irqs(struct pci_dev *dev) +static void ppc_teardown_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev) { ppc_md.teardown_msi_irqs(dev); } + +struct msi_controller ppc_msi_ctrl = { + .setup_irqs = ppc_setup_msi_irqs, + .teardown_irqs = ppc_teardown_msi_irqs, +}; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e5dad9a..c3f28c5 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1597,6 +1597,9 @@ void pcibios_scan_phb(struct pci_controller *hose) /* Wire up PHB bus resources */ pcibios_setup_phb_resources(hose, resources); +#ifdef CONFIG_PCI_MSI + hose-msi_ctrl = ppc_msi_ctrl; +#endif hose-busn.start = hose-first_busno; hose-busn.end = hose-last_busno; hose-busn.flags = IORESOURCE_BUS; -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 16/16] PCI/MSI: Clean up unused MSI arch functions
Now we use MSI controller in all platforms to configure MSI/MSI-X. We can clean up the unused arch functions. Signed-off-by: Yijing Wang wangyij...@huawei.com Reviewed-by: Lucas Stach l.st...@pengutronix.de --- drivers/pci/msi.c | 90 ++ include/linux/msi.h | 11 -- 2 files changed, 33 insertions(+), 68 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 0e1da3e..cdb4634 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -27,7 +27,6 @@ int pci_msi_ignore_mask; #define msix_table_size(flags) ((flags PCI_MSIX_FLAGS_QSIZE) + 1) - /* Arch hooks */ struct msi_controller * __weak pcibios_msi_controller(struct pci_bus *bus) @@ -35,56 +34,31 @@ struct msi_controller * __weak pcibios_msi_controller(struct pci_bus *bus) return NULL; } -struct msi_controller *pci_msi_controller(struct pci_bus *bus) -{ - return pcibios_msi_controller(bus); -} - -int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - struct msi_controller *ctrl = pci_msi_controller(dev-bus); - int err; - - if (!ctrl || !ctrl-setup_irq) - return -EINVAL; - - err = ctrl-setup_irq(ctrl, dev, desc); - if (err 0) - return err; - - return 0; -} - -void __weak arch_teardown_msi_irq(unsigned int irq) -{ - struct msi_desc *entry = irq_get_msi_desc(irq); - struct msi_controller *ctrl = pci_msi_controller(entry-dev-bus); - - if (!ctrl || !ctrl-teardown_irq) - return; - - ctrl-teardown_irq(ctrl, irq); -} - -int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +int setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *entry; int ret; struct msi_controller *ctrl; - ctrl = pci_msi_controller(dev-bus); - if (ctrl ctrl-setup_irqs) + ctrl = pcibios_msi_controller(dev-bus); + if (!ctrl) + return -EINVAL; + + if (ctrl-setup_irqs) return ctrl-setup_irqs(ctrl, dev, nvec, type); /* * If an architecture wants to support multiple MSI, it needs to -* override arch_setup_msi_irqs() +* implement ctrl-setup_irqs(). */ if (type == PCI_CAP_ID_MSI nvec 1) return 1; + if (!ctrl-setup_irq) + return -EINVAL; + list_for_each_entry(entry, dev-msi_list, list) { - ret = arch_setup_msi_irq(dev, entry); + ret = ctrl-setup_irq(ctrl, dev, entry); if (ret 0) return ret; if (ret 0) @@ -101,6 +75,10 @@ int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) void default_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; + struct msi_controller *ctrl = pcibios_msi_controller(dev-bus); + + if (!ctrl-teardown_irq) + return; list_for_each_entry(entry, dev-msi_list, list) { int i, nvec; @@ -111,18 +89,18 @@ void default_teardown_msi_irqs(struct pci_dev *dev) else nvec = 1 entry-msi_attrib.multiple; for (i = 0; i nvec; i++) - arch_teardown_msi_irq(entry-irq + i); + ctrl-teardown_irq(ctrl, entry-irq + i); } } -void __weak arch_teardown_msi_irqs(struct pci_dev *dev) +static void teardown_msi_irqs(struct pci_dev *dev) { - struct msi_controller *ctrl = pci_msi_controller(dev-bus); + struct msi_controller *ctrl = pcibios_msi_controller(dev-bus); if (ctrl ctrl-teardown_irqs) return ctrl-teardown_irqs(ctrl, dev); - return default_teardown_msi_irqs(dev); + default_teardown_msi_irqs(dev); } static void default_restore_msi_irq(struct pci_dev *dev, int irq) @@ -143,10 +121,17 @@ static void default_restore_msi_irq(struct pci_dev *dev, int irq) __write_msi_msg(entry, entry-msg); } -void __weak arch_restore_msi_irqs(struct pci_dev *dev) +void default_restore_msi_irqs(struct pci_dev *dev) { - struct msi_controller *ctrl = pci_msi_controller(dev-bus); + struct msi_desc *entry; + list_for_each_entry(entry, dev-msi_list, list) + default_restore_msi_irq(dev, entry-irq); +} + +static void restore_msi_irqs(struct pci_dev *dev) +{ + struct msi_controller *ctrl = pcibios_msi_controller(dev-bus); if (ctrl ctrl-restore_irqs) return ctrl-restore_irqs(ctrl, dev); @@ -259,15 +244,6 @@ void unmask_msi_irq(struct irq_data *data) msi_set_mask_bit(data, 0); } -void default_restore_msi_irqs(struct pci_dev *dev) -{ - struct msi_desc *entry; - - list_for_each_entry(entry, dev-msi_list, list) { - default_restore_msi_irq(dev, entry-irq); - } -} - void __read_msi_msg(struct msi_desc *entry, struct msi_msg
[PATCH 00/16] Use MSI controller framework to configure MSI/MSI-X
This series is based on [PATCH 00/10] Save MSI chip in pci_sys_data, https://lkml.org/lkml/2014/10/27/85. This series is the v4 of Use MSI chip framework to configure MSI/MSI-X in all platforms. I split it out and post it together. v3-new: Some trivial changes in IA64/MSI: Use MSI controller framework to configure MSI/MSI-X irq. Old history: v2-v3: 1. For patch x86/xen/MSI: Eliminate..., introduce a new global flag pci_msi_ignore_mask to control the msi mask instead of replacing the irqchip-mask with nop function, the latter method has problem pointed out by Konrad Rzeszutek Wilk. 2. Save msi chip in arch pci sysdata instead of associating msi chip to pci bus. Because pci devices under same host share the same msi chip, so I think associate msi chip to pci host/pci sysdata is better than to bother every pci bus/devices. A better solution suggested by Liviu is to rip out pci_host_bridge from pci_create_root_bus(), then we can save some pci host common attributes like domain_nr, msi_chip, resources, into the generic pci_host_bridge. Because this changes to pci host bridge is also a large series, so I think we should go step by step, I will try to post it in another series later. 4. Clean up arm pcibios_add_bus() and pcibios_remove_bus() which were used to associate msi chip to pci bus. v1-v2: Add a patch to make s390 MSI code build happy between patch x86/xen/MSI: E.. and s390/MSI: Use MSI... Fix several typo problems found by Lucas. RFC-v1: Updated [patch 4/21] x86/xen/MSI: Eliminate..., export msi_chip instead of #ifdef to fix MSI bug in xen running in x86. Rename arch_get_match_msi_chip() to arch_find_msi_chip(). Drop use struct device as the msi_chip argument, we will do that later in another patchset. Yijing Wang (16): PCI/MSI: Refactor MSI controller to make it become more common x86/MSI: Use MSI controller framework to configure MSI/MSI-X irq x86/xen/MSI: Use MSI controller framework to configure MSI/MSI-X irq Irq_remapping/MSI: Use MSI controller framework to configure MSI/MSI-X irq x86/MSI: Remove unused MSI weak arch functions Mips/MSI: Save MSI controller in pci sysdata MIPS/Octeon/MSI: Use MSI controller framework to configure MSI/MSI-X irq MIPS/Xlp/MSI: Use MSI controller framework to configure MSI/MSI-X irq MIPS/Xlr/MSI: Use MSI controller framework to configure MSI/MSI-X irq Powerpc/MSI: Use MSI controller framework to configure MSI/MSI-X irq s390/MSI: Use MSI controller framework to configure MSI/MSI-X irq arm/iop13xx/MSI: Use MSI controller framework to configure MSI/MSI-X irq IA64/MSI: Use MSI controller framework to configure MSI/MSI-X irq Sparc/MSI: Use MSI controller framework to configure MSI/MSI-X irq tile/MSI: Use MSI controller framework to configure MSI/MSI-X irq PCI/MSI: Clean up unused MSI arch functions arch/arm/mach-iop13xx/include/mach/pci.h|4 + arch/arm/mach-iop13xx/iq81340mc.c |3 + arch/arm/mach-iop13xx/iq81340sc.c |5 +- arch/arm/mach-iop13xx/msi.c | 11 ++- arch/ia64/include/asm/pci.h |3 +- arch/ia64/kernel/msi_ia64.c | 24 -- arch/ia64/pci/pci.c |1 + arch/mips/include/asm/netlogic/xlp-hal/pcibus.h |1 + arch/mips/include/asm/octeon/pci-octeon.h |4 + arch/mips/include/asm/pci.h |3 + arch/mips/pci/msi-octeon.c | 31 --- arch/mips/pci/msi-xlp.c | 11 ++- arch/mips/pci/pci-octeon.c |3 + arch/mips/pci/pci-xlp.c |3 + arch/mips/pci/pci-xlr.c | 17 - arch/mips/pci/pci.c |9 ++ arch/powerpc/include/asm/pci-bridge.h |8 ++ arch/powerpc/kernel/msi.c | 19 - arch/powerpc/kernel/pci-common.c|3 + arch/s390/include/asm/pci.h |1 + arch/s390/pci/pci.c | 19 - arch/sparc/kernel/pci.c | 20 - arch/sparc/kernel/pci_impl.h|3 + arch/tile/include/asm/pci.h |2 + arch/tile/kernel/pci_gx.c | 18 - arch/x86/include/asm/pci.h |9 +- arch/x86/include/asm/x86_init.h |4 - arch/x86/kernel/apic/io_apic.c | 18 - arch/x86/kernel/x86_init.c | 24 -- arch/x86/pci/acpi.c |1 + arch/x86/pci/common.c |3 + arch/x86/pci/xen.c | 45 ++ drivers/iommu/irq_remapping.c | 11 ++- drivers/pci/msi.c | 97 ++ include/linux/msi.h | 19 ++--- 35 files changed, 301 insertions(+),
[PATCH 04/16] Irq_remapping/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- drivers/iommu/irq_remapping.c | 11 --- 1 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 74a1767..6db1459 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -140,8 +140,8 @@ error: return ret; } -static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, - int nvec, int type) +static int irq_remapping_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev, int nvec, int type) { if (type == PCI_CAP_ID_MSI) return do_setup_msi_irqs(dev, nvec); @@ -149,6 +149,11 @@ static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, return do_setup_msix_irqs(dev, nvec); } +static struct msi_controller remap_msi_ctrl = { + .setup_irqs = irq_remapping_setup_msi_irqs, + .teardown_irq = native_teardown_msi_irq, +}; + static void eoi_ioapic_pin_remapped(int apic, int pin, int vector) { /* @@ -166,9 +171,9 @@ static void __init irq_remapping_modify_x86_ops(void) x86_io_apic_ops.set_affinity= set_remapped_irq_affinity; x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; - x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; x86_msi.compose_msi_msg = compose_remapped_msi_msg; + x86_msi_ctrl = remap_msi_ctrl; } static __init int setup_nointremap(char *str) -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 06/16] Mips/MSI: Save MSI controller in pci sysdata
Save MSI controller in pci sysdata, add arch pcibios_msi_controller() to extract out MSI controller. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/mips/include/asm/pci.h |3 +++ arch/mips/pci/pci.c |9 + 2 files changed, 12 insertions(+), 0 deletions(-) diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 974b0e3..d7cd850 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -43,6 +43,9 @@ struct pci_controller { int iommu; +#ifdef CONFIG_PCI_MSI + struct msi_controller *msi_ctrl; +#endif /* Optional access methods for reading/writing the bus number of the PCI controller */ int (*get_busno)(void); diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 1bf60b1..7917cba 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -36,6 +36,15 @@ unsigned long PCIBIOS_MIN_MEM; static int pci_initialized; +#ifdef CONFIG_PCI_MSI +struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) +{ + struct pci_controller *ctrl = bus-sysdata; + + return ctrl-msi_ctrl; +} +#endif + /* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 09/16] MIPS/Xlr/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/mips/pci/pci-xlr.c | 17 +++-- 1 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 0dde803..1e43c70 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -149,6 +149,8 @@ static struct resource nlm_pci_io_resource = { .flags = IORESOURCE_IO, }; +static struct msi_controller xlr_msi_ctrl; + struct pci_controller nlm_pci_controller = { .index = 0, .pci_ops= nlm_pci_ops, @@ -156,6 +158,9 @@ struct pci_controller nlm_pci_controller = { .mem_offset = 0xUL, .io_resource= nlm_pci_io_resource, .io_offset = 0xUL, +#ifdef CONFIG_PCI_MSI + .msi_ctrl = xlr_msi_ctrl, +#endif }; /* @@ -214,11 +219,13 @@ static int get_irq_vector(const struct pci_dev *dev) } #ifdef CONFIG_PCI_MSI -void arch_teardown_msi_irq(unsigned int irq) +static void xlr_teardown_msi_irq(struct msi_controller *ctrl, + unsigned int irq) { } -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +static int xlr_setup_msi_irq(struct msi_controller *ctrl, + struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; struct pci_dev *lnk; @@ -263,6 +270,12 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) write_msi_msg(irq, msg); return 0; } + +static struct msi_controller xlr_msi_ctrl = { + .setup_irq = xlr_setup_msi_irq, + .teardown_irq = xlr_teardown_msi_irq, +}; + #endif /* Extra ACK needed for XLR on chip PCI controller */ -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 08/16] MIPS/Xlp/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X IRQ. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/mips/include/asm/netlogic/xlp-hal/pcibus.h |1 + arch/mips/pci/msi-xlp.c | 11 +-- arch/mips/pci/pci-xlp.c |3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h b/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h index 91540f4..8e6869a 100644 --- a/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h +++ b/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h @@ -103,6 +103,7 @@ #ifdef CONFIG_PCI_MSI void xlp_init_node_msi_irqs(int node, int link); +extern struct msi_controller xlp_msi_ctrl; #else static inline void xlp_init_node_msi_irqs(int node, int link) {} #endif diff --git a/arch/mips/pci/msi-xlp.c b/arch/mips/pci/msi-xlp.c index fa374fe..d18a162 100644 --- a/arch/mips/pci/msi-xlp.c +++ b/arch/mips/pci/msi-xlp.c @@ -245,7 +245,8 @@ static struct irq_chip xlp_msix_chip = { .irq_unmask = unmask_msi_irq, }; -void arch_teardown_msi_irq(unsigned int irq) +static void xlp_teardown_msi_irq(struct msi_controller *ctrl, + unsigned int irq) { } @@ -452,7 +453,8 @@ static int xlp_setup_msix(uint64_t lnkbase, int node, int link, return 0; } -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +static int xlp_setup_msi_irq(struct msi_controller *ctrl, + struct pci_dev *dev, struct msi_desc *desc) { struct pci_dev *lnkdev; uint64_t lnkbase; @@ -474,6 +476,11 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) return xlp_setup_msi(lnkbase, node, link, desc); } +struct msi_controller xlp_msi_ctrl = { + .setup_irq = xlp_setup_msi_irq, + .teardown_irq = xlp_teardown_msi_irq, +}; + void __init xlp_init_node_msi_irqs(int node, int link) { struct nlm_soc_info *nodep; diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c index 7babf01..ab80417 100644 --- a/arch/mips/pci/pci-xlp.c +++ b/arch/mips/pci/pci-xlp.c @@ -174,6 +174,9 @@ struct pci_controller nlm_pci_controller = { .mem_offset = 0xUL, .io_resource= nlm_pci_io_resource, .io_offset = 0xUL, +#ifdef CONFIG_PCI_MSI + .msi_ctrl = xlp_msi_ctrl, +#endif }; struct pci_dev *xlp_get_pcie_link(const struct pci_dev *dev) -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 14/16] Sparc/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com Acked-by: David S. Miller da...@davemloft.net --- arch/sparc/kernel/pci.c | 20 ++-- arch/sparc/kernel/pci_impl.h |3 +++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index b36365f..c691a10 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -656,6 +656,9 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm, printk(PCI: Scanning PBM %s\n, node-full_name); +#ifdef CONFIG_PCI_MSI + pbm-msi_ctrl = sparc_msi_ctrl; +#endif pci_add_resource_offset(resources, pbm-io_space, pbm-io_space.start); pci_add_resource_offset(resources, pbm-mem_space, @@ -905,7 +908,15 @@ int pci_domain_nr(struct pci_bus *pbus) EXPORT_SYMBOL(pci_domain_nr); #ifdef CONFIG_PCI_MSI -int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) +{ + struct pci_pbm_info *pbm = bus-sysdata; + + return pbm-msi_ctrl; +} + +static int sparc_setup_msi_irq(struct msi_controller *ctrl, + struct pci_dev *pdev, struct msi_desc *desc) { struct pci_pbm_info *pbm = pdev-dev.archdata.host_controller; unsigned int irq; @@ -916,7 +927,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) return pbm-setup_msi_irq(irq, pdev, desc); } -void arch_teardown_msi_irq(unsigned int irq) +static void sparc_teardown_msi_irq(struct msi_controller *ctrl, unsigned int irq) { struct msi_desc *entry = irq_get_msi_desc(irq); struct pci_dev *pdev = entry-dev; @@ -925,6 +936,11 @@ void arch_teardown_msi_irq(unsigned int irq) if (pbm-teardown_msi_irq) pbm-teardown_msi_irq(irq, pdev); } + +struct msi_controller sparc_msi_ctrl = { + .setup_irq = sparc_setup_msi_irq, + .teardown_irq = sparc_teardown_msi_irq, +}; #endif /* !(CONFIG_PCI_MSI) */ static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit) diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h index 75803c7..e02642d 100644 --- a/arch/sparc/kernel/pci_impl.h +++ b/arch/sparc/kernel/pci_impl.h @@ -55,6 +55,8 @@ struct sparc64_msiq_cookie { struct pci_pbm_info *pbm; unsigned long msiqid; }; + +extern struct msi_controller sparc_msi_ctrl; #endif struct pci_pbm_info { @@ -132,6 +134,7 @@ struct pci_pbm_info { void*msi_queues; unsigned long *msi_bitmap; unsigned int*msi_irq_table; + struct msi_controller *msi_ctrl; int (*setup_msi_irq)(unsigned int *irq_p, struct pci_dev *pdev, struct msi_desc *entry); void (*teardown_msi_irq)(unsigned int irq, struct pci_dev *pdev); -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 05/16] x86/MSI: Remove unused MSI weak arch functions
Now we can clean up MSI weak arch functions in x86. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/x86/include/asm/pci.h |5 + arch/x86/include/asm/x86_init.h |4 arch/x86/kernel/apic/io_apic.c | 21 + arch/x86/kernel/x86_init.c | 24 4 files changed, 6 insertions(+), 48 deletions(-) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 1af3d77..21fe24f 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -99,14 +99,11 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_PCI_MSI /* implemented in arch/x86/kernel/apic/io_apic. */ struct msi_desc; -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -void native_teardown_msi_irq(unsigned int irq); -void native_restore_msi_irqs(struct pci_dev *dev); +void native_teardown_msi_irq(struct msi_controller *ctrl, unsigned int irq); int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, unsigned int irq_base, unsigned int irq_offset); extern struct msi_controller *x86_msi_ctrl; #else -#define native_setup_msi_irqs NULL #define native_teardown_msi_irqNULL #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f58a9c7..2514f67 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -174,13 +174,9 @@ struct pci_dev; struct msi_msg; struct x86_msi_ops { - int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); - void (*teardown_msi_irq)(unsigned int irq); - void (*teardown_msi_irqs)(struct pci_dev *dev); - void (*restore_msi_irqs)(struct pci_dev *dev); int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8b8c671..04bf011 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3207,7 +3207,8 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, return 0; } -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int native_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev, int nvec, int type) { struct msi_desc *msidesc; unsigned int irq; @@ -3234,26 +3235,14 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 0; } -static int __native_setup_msi_irqs(struct msi_controller *ctrl, - struct pci_dev *dev, int nvec, int type) -{ - return native_setup_msi_irqs(dev, nvec, type); -} - -void native_teardown_msi_irq(unsigned int irq) +void native_teardown_msi_irq(struct msi_controller *ctrl, unsigned int irq) { irq_free_hwirq(irq); } -static void __native_teardown_msi_irq(struct msi_controller *ctrl, - unsigned int irq) -{ - native_teardown_msi_irq(irq); -} - static struct msi_controller native_msi_ctrl = { - .setup_irqs = __native_setup_msi_irqs, - .teardown_irq = __native_teardown_msi_irq, + .setup_irqs = native_setup_msi_irqs, + .teardown_irq = native_teardown_msi_irq, }; struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 234b072..cc32568 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -110,34 +110,10 @@ EXPORT_SYMBOL_GPL(x86_platform); #if defined(CONFIG_PCI_MSI) struct x86_msi_ops x86_msi = { - .setup_msi_irqs = native_setup_msi_irqs, .compose_msi_msg= native_compose_msi_msg, - .teardown_msi_irq = native_teardown_msi_irq, - .teardown_msi_irqs = default_teardown_msi_irqs, - .restore_msi_irqs = default_restore_msi_irqs, .setup_hpet_msi = default_setup_hpet_msi, }; -/* MSI arch specific hooks */ -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - return x86_msi.setup_msi_irqs(dev, nvec, type); -} - -void arch_teardown_msi_irqs(struct pci_dev *dev) -{ - x86_msi.teardown_msi_irqs(dev); -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - x86_msi.teardown_msi_irq(irq); -} - -void arch_restore_msi_irqs(struct pci_dev *dev) -{ - x86_msi.restore_msi_irqs(dev); -} #endif struct x86_io_apic_ops x86_io_apic_ops = { -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 15/16] tile/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/tile/include/asm/pci.h |2 ++ arch/tile/kernel/pci_gx.c | 18 -- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index dfedd7a..3ebd66b 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -152,6 +152,7 @@ struct pci_controller { int pio_io_index; /* PIO region index for I/O space access */ #endif + struct msi_controller *msi_ctrl; /* * Mem-Map regions for all the memory controllers so that Linux can * map all of its physical memory space to the PCI bus. @@ -179,6 +180,7 @@ struct pci_controller { int irq_intx_table[4]; }; +extern struct msi_controller tilegx_msi_ctrl; extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES]; extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO]; extern int num_trio_shims; diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index e39f9c5..6bf5a24 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -887,6 +887,7 @@ int __init pcibios_init(void) controller-mem_offset); pci_add_resource(resources, controller-io_space); controller-first_busno = next_busno; + controller-msi_ctrl = tilegx_msi_ctrl; bus = pci_scan_root_bus(NULL, next_busno, controller-ops, controller, resources); controller-root_bus = bus; @@ -1485,7 +1486,15 @@ static struct irq_chip tilegx_msi_chip = { /* TBD: support set_affinity. */ }; -int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) +{ + struct pci_controller *controller = bus-sysdata; + + return controller-msi_ctrl; +} + +static int tile_setup_msi_irq(struct msi_controller *ctrl, + struct pci_dev *pdev, struct msi_desc *desc) { struct pci_controller *controller; gxio_trio_context_t *trio_context; @@ -1604,7 +1613,12 @@ is_64_failure: return ret; } -void arch_teardown_msi_irq(unsigned int irq) +static void tile_teardown_msi_irq(struct msi_controller *ctrl, unsigned int irq) { irq_free_hwirq(irq); } + +struct msi_controller tilegx_msi_ctrl = { + .setup_irq = tile_setup_msi_irq, + .teardown_irq = tile_teardown_msi_irq, +}; -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 12/16] arm/iop13xx/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/arm/mach-iop13xx/include/mach/pci.h |4 arch/arm/mach-iop13xx/iq81340mc.c|3 +++ arch/arm/mach-iop13xx/iq81340sc.c|5 - arch/arm/mach-iop13xx/msi.c | 11 +-- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-iop13xx/include/mach/pci.h b/arch/arm/mach-iop13xx/include/mach/pci.h index 59f42b5..c8f5caf 100644 --- a/arch/arm/mach-iop13xx/include/mach/pci.h +++ b/arch/arm/mach-iop13xx/include/mach/pci.h @@ -11,6 +11,10 @@ void iop13xx_atu_select(struct hw_pci *plat_pci); void iop13xx_pci_init(void); void iop13xx_map_pci_memory(void); +#ifdef CONFIG_PCI_MSI +extern struct msi_controller iop13xx_msi_ctrl; +#endif + #define IOP_PCI_STATUS_ERROR (PCI_STATUS_PARITY | \ PCI_STATUS_SIG_TARGET_ABORT | \ PCI_STATUS_REC_TARGET_ABORT | \ diff --git a/arch/arm/mach-iop13xx/iq81340mc.c b/arch/arm/mach-iop13xx/iq81340mc.c index 9cd07d3..7b802f5 100644 --- a/arch/arm/mach-iop13xx/iq81340mc.c +++ b/arch/arm/mach-iop13xx/iq81340mc.c @@ -59,6 +59,9 @@ static struct hw_pci iq81340mc_pci __initdata = { .map_irq= iq81340mc_pcix_map_irq, .scan = iop13xx_scan_bus, .preinit= iop13xx_pci_init, +#ifdef CONFIG_PCI_MSI + .msi_ctrl = iop13xx_msi_ctrl, +#endif }; static int __init iq81340mc_pci_init(void) diff --git a/arch/arm/mach-iop13xx/iq81340sc.c b/arch/arm/mach-iop13xx/iq81340sc.c index b3ec11c..934de2e 100644 --- a/arch/arm/mach-iop13xx/iq81340sc.c +++ b/arch/arm/mach-iop13xx/iq81340sc.c @@ -60,7 +60,10 @@ static struct hw_pci iq81340sc_pci __initdata = { .setup = iop13xx_pci_setup, .scan = iop13xx_scan_bus, .map_irq= iq81340sc_atux_map_irq, - .preinit= iop13xx_pci_init + .preinit= iop13xx_pci_init, +#ifdef CONFIG_PCI_MSI + .msi_ctrl = iop13xx_msi_ctrl, +#endif }; static int __init iq81340sc_pci_init(void) diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c index e7730cf..07a512e 100644 --- a/arch/arm/mach-iop13xx/msi.c +++ b/arch/arm/mach-iop13xx/msi.c @@ -132,7 +132,8 @@ static struct irq_chip iop13xx_msi_chip = { .irq_unmask = unmask_msi_irq, }; -int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +static int iop13xx_setup_msi_irq(struct msi_controller *ctrl, + struct pci_dev *dev, struct msi_desc *desc) { int id, irq = irq_alloc_desc_from(IRQ_IOP13XX_MSI_0, -1); struct msi_msg msg; @@ -159,7 +160,13 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) return 0; } -void arch_teardown_msi_irq(unsigned int irq) +static void iop13xx_teardown_msi_irq(struct msi_controller *ctrl, + unsigned int irq) { irq_free_desc(irq); } + +struct msi_controller iop13xx_msi_ctrl = { + .setup_irq = iop13xx_setup_msi_irq, + .teardown_irq = iop13xx_teardown_msi_irq, +}; -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 03/16] x86/xen/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com CC: David Vrabel david.vra...@citrix.com CC: Konrad Rzeszutek Wilk konrad.w...@oracle.com --- arch/x86/pci/xen.c | 45 +++-- 1 files changed, 27 insertions(+), 18 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 466b978..83d8d50 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -157,7 +157,8 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi, struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); -static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int xen_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev, int nvec, int type) { int irq, ret, i; struct msi_desc *msidesc; @@ -219,7 +220,8 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, msg-data = XEN_PIRQ_MSI_DATA; } -static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int xen_hvm_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev, int nvec, int type) { int irq, pirq; struct msi_desc *msidesc; @@ -267,7 +269,8 @@ error: #ifdef CONFIG_XEN_DOM0 static bool __read_mostly pci_seg_supported = true; -static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int xen_initdom_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev, int nvec, int type) { int ret = 0; struct msi_desc *msidesc; @@ -349,7 +352,8 @@ out: return ret; } -static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) +static void xen_initdom_restore_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev) { int ret = 0; @@ -376,7 +380,13 @@ static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) } #endif -static void xen_teardown_msi_irqs(struct pci_dev *dev) +static void xen_teardown_msi_irq(struct msi_controller *ctrl, unsigned int irq) +{ + xen_destroy_irq(irq); +} + +static void xen_teardown_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev) { struct msi_desc *msidesc; @@ -390,11 +400,7 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) default_teardown_msi_irqs(dev); } -static void xen_teardown_msi_irq(unsigned int irq) -{ - xen_destroy_irq(irq); -} - +struct msi_controller xen_msi_ctrl; #endif int __init pci_xen_init(void) @@ -415,9 +421,10 @@ int __init pci_xen_init(void) #endif #ifdef CONFIG_PCI_MSI - x86_msi.setup_msi_irqs = xen_setup_msi_irqs; - x86_msi.teardown_msi_irq = xen_teardown_msi_irq; - x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs; + xen_msi_ctrl.setup_irqs = xen_setup_msi_irqs; + xen_msi_ctrl.teardown_irq = xen_teardown_msi_irq; + xen_msi_ctrl.teardown_irqs = xen_teardown_msi_irqs; + x86_msi_ctrl = xen_msi_ctrl; pci_msi_ignore_mask = 1; #endif return 0; @@ -437,8 +444,9 @@ int __init pci_xen_hvm_init(void) #endif #ifdef CONFIG_PCI_MSI - x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; - x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + xen_msi_ctrl.setup_irqs = xen_hvm_setup_msi_irqs; + xen_msi_ctrl.teardown_irq = xen_teardown_msi_irq; + x86_msi_ctrl = xen_msi_ctrl; #endif return 0; } @@ -495,9 +503,10 @@ int __init pci_xen_initial_domain(void) int irq; #ifdef CONFIG_PCI_MSI - x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; - x86_msi.teardown_msi_irq = xen_teardown_msi_irq; - x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; + xen_msi_ctrl.setup_irqs = xen_initdom_setup_msi_irqs; + xen_msi_ctrl.teardown_irq = xen_teardown_msi_irq; + xen_msi_ctrl.restore_irqs = xen_initdom_restore_msi_irqs; + x86_msi_ctrl = xen_msi_ctrl; pci_msi_ignore_mask = 1; #endif xen_setup_acpi_sci(); -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 02/16] x86/MSI: Use MSI controller framework to configure MSI/MSI-X irq
Use MSI controller framework instead of arch MSI functions to configure MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework. Signed-off-by: Yijing Wang wangyij...@huawei.com --- arch/x86/include/asm/pci.h |4 arch/x86/kernel/apic/io_apic.c | 25 + arch/x86/pci/acpi.c|1 + arch/x86/pci/common.c |3 +++ 4 files changed, 33 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 0892ea0..1af3d77 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -20,6 +20,9 @@ struct pci_sysdata { #ifdef CONFIG_X86_64 void*iommu; /* IOMMU private data */ #endif +#ifdef CONFIG_PCI_MSI + struct msi_controller *msi_ctrl; +#endif }; extern int pci_routeirq; @@ -101,6 +104,7 @@ void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev); int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, unsigned int irq_base, unsigned int irq_offset); +extern struct msi_controller *x86_msi_ctrl; #else #define native_setup_msi_irqs NULL #define native_teardown_msi_irqNULL diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1183d54..8b8c671 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3234,11 +3234,36 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 0; } +static int __native_setup_msi_irqs(struct msi_controller *ctrl, + struct pci_dev *dev, int nvec, int type) +{ + return native_setup_msi_irqs(dev, nvec, type); +} + void native_teardown_msi_irq(unsigned int irq) { irq_free_hwirq(irq); } +static void __native_teardown_msi_irq(struct msi_controller *ctrl, + unsigned int irq) +{ + native_teardown_msi_irq(irq); +} + +static struct msi_controller native_msi_ctrl = { + .setup_irqs = __native_setup_msi_irqs, + .teardown_irq = __native_teardown_msi_irq, +}; + +struct msi_controller *pcibios_msi_controller(struct pci_bus *bus) +{ + struct pci_sysdata *sys = bus-sysdata; + + return sys-msi_ctrl; +} +struct msi_controller *x86_msi_ctrl = native_msi_ctrl; + #ifdef CONFIG_DMAR_TABLE static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index cfd1b13..edb14dd 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -508,6 +508,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) sd = info-sd; sd-domain = domain; + sd-msi_ctrl = x86_msi_ctrl; sd-node = node; sd-companion = device; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 7b20bcc..4deb240 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -468,6 +468,9 @@ void pcibios_scan_root(int busnum) return; } sd-node = x86_pci_root_bus_node(busnum); +#ifdef CONFIG_PCI_MSI + sd-msi_ctrl = x86_msi_ctrl; +#endif x86_pci_root_bus_resources(busnum, resources); printk(KERN_DEBUG PCI: Probing PCI hardware (bus %02x)\n, busnum); bus = pci_scan_root_bus(NULL, busnum, pci_root_ops, sd, resources); -- 1.7.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 16/27] Mips/MSI: Save msi chip in pci sysdata
On Wed, Oct 15, 2014 at 11:07:04AM +0800, Yijing Wang wrote: +static inline struct msi_chip *pci_msi_chip(struct pci_bus *bus) +{ + struct pci_controller *control = (struct pci_controller *)bus-sysdata; bus-sysdata is void * so this cast is unnecessary. Ralf ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] arm: dma-mapping: fix compilation error when CONFIG_MMU is not present
Hello, adding a dependency to CONFIG_ARM_DMA_USE_IOMMU is probably not a good idea anyways. If you read the Documentation/kbuild/kconfig-language.txt, you will see that: 1. in general, selected options should have no prompt and no dependencies 2. giving ARM_DMA_USE_IOMMU a dependency on MMU will and cannot solve the problem, since ARM_DMA_USE_IOMMU itself is selected by several sources and select will not enforce dependencies to be resolved (to be precise, it will simply ignore them). Thus, CONFIG_MMU will not be enabled (as you can see in your warning). If you want to ensure CONFIG_MMU is enabled when CONFIG_ARM_DMA_USE_IOMMU is selected, the selecting options (e.g. SHMOBILE_IOMMU) should have a dependency on MMU. However, i'm not sure if that's the right way to solve it, since it will hide these options when MMU is not enabled. Making the whole IOMMU subsystem depend on MMU would probably also solve the problem. Is there any situation where it would make sense to use an IOMMU without an MMU? Best Regards, Stefan Hengelein 2014-10-23 10:15 GMT+02:00 Marek Szyprowski m.szyprow...@samsung.com: Hello, On 2014-10-21 02:05, Laura Abbott wrote: On 10/20/2014 6:50 AM, Stefan Hengelein wrote: Well, arch/arm/mm/dma-mapping.c: In function ‘__atomic_get_pages’: arch/arm/mm/dma-mapping.c:1265:31: error: ‘atomic_pool’ undeclared (first use in this function) arch/arm/mm/dma-mapping.c: In function ‘__iommu_get_pages’: arch/arm/mm/dma-mapping.c:1275:2: error: implicit declaration of function ‘__in_atomic_pool’ [-Werror=implicit-function-declaration] others: arch/arm/mm/dma-mapping.c: In function ‘__iommu_alloc_remap’: arch/arm/mm/dma-mapping.c:1200:4: error: ‘VM_ARM_DMA_CONSISTENT’ undeclared (first use in this function) arch/arm/mm/dma-mapping.c:1200:4: note: each undeclared identifier is reported only once for each function it appears in arch/arm/mm/dma-mapping.c: In function ‘__iommu_get_pages’: arch/arm/mm/dma-mapping.c:1282:29: error: ‘VM_ARM_DMA_CONSISTENT’ undeclared (first use in this function) arch/arm/mm/dma-mapping.c: In function ‘arm_iommu_free_attrs’: arch/arm/mm/dma-mapping.c:1411:4: error: ‘VM_ARM_DMA_CONSISTENT’ undeclared (first use in this function) i've attached a configuration where it happens. when you place the configuration in your tree and compile the file, the errors should show up. However, within the configuration CONFIG_ARM_DMA_USE_IOMMU is defined and CONFIG_MMU is not. AFAIUI, if both should be enabled together, it should be mapped in Kconfig somewhere, right? There isn't a nice way to fixup the dma consistent flags so I would say this option should just depend on MMU. The question is which option should depend on MMU? Making CONFIG_ARM_DMA_USE_IOMMU to depend on MMU doesn't solve this case. It only gives following warning with the mentioned .config: warning: (VIDEO_OMAP3 EXYNOS_IOMMU SHMOBILE_IOMMU IPMMU_VMSA ARM_SMMU) selects ARM_DMA_USE_IOMMU which has unmet direct dependencies (MMU) IMHO the best solution will be to either make the whole IOMMU subsystem to depend on MMU or alternatively, add this dependency to all IOMMU drivers which can be enabled by COMPILE_TEST option (here the issue is caused by SHMOBILE_IOMMU, which otherwise cannot be enabled when no-mmu systems are selected). Best regards -- Marek Szyprowski, PhD Samsung RD Institute Poland ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 18/27] MIPS/Xlp: Remove the dead function destroy_irq() to fix build error
Applied, thanks. Ralf ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH v3 5/7] dma-mapping: detect and configure IOMMU in of_dma_configure
On Mon, Oct 27, 2014 at 11:30:33AM +, Laurent Pinchart wrote: Hi Will, Hey Laurent, On Monday 27 October 2014 10:51:59 Will Deacon wrote: On Tue, Oct 14, 2014 at 01:53:59PM +0100, Laurent Pinchart wrote: On Monday 22 September 2014 18:50:27 Will Deacon wrote: On Mon, Sep 22, 2014 at 10:29:10AM +0100, Thierry Reding wrote: Agreed. I wonder how useful it is to know the set of IOMMU instances that each device can master through. Wouldn't it be more useful to keep a list of master interfaces for each device? The set of IOMMU instances can trivially be derived from that. I'm struggling to think how that would look. What do you mean by `master interfaces' in terms of the code we have in Linux? At the end of the day, the list of IOMMU instances (i.e. iommu_dma_mapping) exists because you and Laurent have use-cases involving devices mastering through multiple IOMMUs. If it doesn't work for you, it might be best for you to send me the patch ;) Just for the record, I've brought up the topic of masters being served by multiple IOMMUs, but don't have a use case for it (yet at least). I do have masters served through multiple streams with separate stream IDs, but all by the same IOMMU. Ok. I spoke to Arnd, David and Joerg at LPC and the consensus was that the DMA-mapping API should *not* be exposed to the details of masters that master through multiple IOMMUs. Instead, that should be abstracted by the device API by exposing that device as a single struct device. I'm not sure to follow you here. Aren't we already exposing masters that master through multiple IOMMUs as single instances of struct device ? Hmm, yes, now you've confused me too! The conclusion was certainly that dma-mapping should not be the one dealing with the I/O topology. Domain allocation would then be an iommu callback (something like -get_default_domain), but the rest of the details weren't fleshed out. Joerg? So, that's certainly an area that needs more work and I'll drop the limited support I'd cooked up from this patch set in the next version. How about masters connected to multiple stream IDs of the same IOMMU ? That should still be handled, as I believe that will be a common case. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH linux-next] iommu: add iommu for s390 platform
On Mon, Oct 27, 2014 at 03:32:01PM +0100, Gerald Schaefer wrote: Not sure if I understood the concept of IOMMU domains right. But if this is about having multiple devices in the same domain, so that iommu_ops-map will establish the _same_ DMA mapping on _all_ registered devices, then this should be possible. Yes, this is what domains are about. A domain describes a set of DMA mappings which can be assigned to multiple devices in parallel. We cannot have shared DMA tables because each device gets its own DMA table allocated during device initialization. Is there some hardware reason for this or is that just an implementation detail that can be changed. In other words, does the hardware allow to use the same DMA table for multiple devices? But we could just keep all devices from one domain in a list and then call dma_update_trans() for all devices during iommu_ops-map/unmap. This sounds complicated. Note that a device can be assigned to a domain that already has existing mappings. In this case you need to make sure that the new device inherits these mappings (and destroy all old mappings for the device that possibly exist). I think it is much easier to use the same DMA table for all devices in a domain, if the hardware allows that. Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH v3 5/7] dma-mapping: detect and configure IOMMU in of_dma_configure
On Mon, Oct 27, 2014 at 04:02:16PM +, Will Deacon wrote: On Mon, Oct 27, 2014 at 11:30:33AM +, Laurent Pinchart wrote: I'm not sure to follow you here. Aren't we already exposing masters that master through multiple IOMMUs as single instances of struct device ? Hmm, yes, now you've confused me too! The conclusion was certainly that dma-mapping should not be the one dealing with the I/O topology. Domain allocation would then be an iommu callback (something like -get_default_domain), but the rest of the details weren't fleshed out. The idea is that the IOMMU core code will allocate a default domain for each iommu-group at initialization time. This domain can be requested later by a new iommu-api function and used for DMA-API mappings. A device still can be assigned to another domain by driver code (like VFIO). But if the device is later de-assigned the IOMMU core-code automatically puts it back into the default domain. Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH linux-next] iommu: add iommu for s390 platform
On Thu, 23 Oct 2014 16:04:37 +0200 Frank Blaschka blasc...@linux.vnet.ibm.com wrote: On Thu, Oct 23, 2014 at 02:41:15PM +0200, Joerg Roedel wrote: On Wed, Oct 22, 2014 at 05:43:20PM +0200, Frank Blaschka wrote: Basically there are no limitations. Depending on the s390 maschine generation a device starts its IOVA at a specific address (announced by the HW). But as I already told each device starts at the same address. I think this prevents having multiple devices on the same IOMMU domain. Why, each device has its own IOVA address space, so IOVA A could map to physical address X for one device and to Y for another, no? And if you point multiple devices to the same dma_table they share the mappings (and thus the address space). Or am I getting something wrong? yes, you are absolutely right. There is a per-device dma_table. There is no general IOMMU device but each pci device has its own IOMMU translation capability. I see, in this way it is similar to ARM where there is often also one IOMMU per master device. Is there a possibility the IOMMU domain can support e.g. something like VIOA 0x1 - pci device 1 VIOA 0x1 - pci device 2 A domain is basically an abstraction for a DMA page table (or a dma_table, as you call it on s390). So you can easily create similar mappings for more than one device with it. ok, maybe I was too close to the existing s390 dma implementation or simply wrong, maybe Sebastian or Gerald can give more background Not sure if I understood the concept of IOMMU domains right. But if this is about having multiple devices in the same domain, so that iommu_ops-map will establish the _same_ DMA mapping on _all_ registered devices, then this should be possible. We cannot have shared DMA tables because each device gets its own DMA table allocated during device initialization. But we could just keep all devices from one domain in a list and then call dma_update_trans() for all devices during iommu_ops-map/unmap. Gerald ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH linux-next] iommu: add iommu for s390 platform
On Mon, 27 Oct 2014 17:25:02 +0100 Joerg Roedel j...@8bytes.org wrote: On Mon, Oct 27, 2014 at 03:32:01PM +0100, Gerald Schaefer wrote: Not sure if I understood the concept of IOMMU domains right. But if this is about having multiple devices in the same domain, so that iommu_ops-map will establish the _same_ DMA mapping on _all_ registered devices, then this should be possible. Yes, this is what domains are about. A domain describes a set of DMA mappings which can be assigned to multiple devices in parallel. We cannot have shared DMA tables because each device gets its own DMA table allocated during device initialization. Is there some hardware reason for this or is that just an implementation detail that can be changed. In other words, does the hardware allow to use the same DMA table for multiple devices? Yes, the HW would allow shared DMA tables, but the implementation would need some non-trivial changes. For example, we have a per-device spin_lock for DMA table manipulations and the code in arch/s390/pci/pci_dma.c knows nothing about IOMMU domains or shared DMA tables, it just implements a set of dma_map_ops. Of course this would also go horribly wrong if a device was already in use (via the current dma_map_ops), but I guess using devices through the IOMMU_API prevents using them otherwise? But we could just keep all devices from one domain in a list and then call dma_update_trans() for all devices during iommu_ops-map/unmap. This sounds complicated. Note that a device can be assigned to a domain that already has existing mappings. In this case you need to make sure that the new device inherits these mappings (and destroy all old mappings for the device that possibly exist). I think it is much easier to use the same DMA table for all devices in a domain, if the hardware allows that. Yes, in this case, having one DMA table per domain and sharing it between all devices in that domain sounds like a good idea. However, I can't think of any use case for this, and Frank probably had a very special use case in mind where this scenario doesn't appear, hence the one device per domain restriction. So, if having multiple devices per domain is a must, then we probably need a thorough rewrite of the arch/s390/pci/pci_dma.c code. Gerald ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH linux-next] iommu: add iommu for s390 platform
On Mon, Oct 27, 2014 at 06:02:19PM +0100, Gerald Schaefer wrote: On Mon, 27 Oct 2014 17:25:02 +0100 Joerg Roedel j...@8bytes.org wrote: Is there some hardware reason for this or is that just an implementation detail that can be changed. In other words, does the hardware allow to use the same DMA table for multiple devices? Yes, the HW would allow shared DMA tables, but the implementation would need some non-trivial changes. For example, we have a per-device spin_lock for DMA table manipulations and the code in arch/s390/pci/pci_dma.c knows nothing about IOMMU domains or shared DMA tables, it just implements a set of dma_map_ops. I think it would make sense to move the DMA table handling code and the dma_map_ops implementation to the IOMMU driver too. This is also how some other IOMMU drivers implement it. The plan is to consolidate the dma_ops implementations someday and have a common implementation that works with all IOMMU drivers across architectures. This would benefit s390 as well and obsoletes the driver specific dma_ops implementation. Of course this would also go horribly wrong if a device was already in use (via the current dma_map_ops), but I guess using devices through the IOMMU_API prevents using them otherwise? This is taken care of by the device drivers. A driver for a device either uses the DMA-API or does its own management of DMA mappings using the IOMMU-API. VFIO is an example for the later case. I think it is much easier to use the same DMA table for all devices in a domain, if the hardware allows that. Yes, in this case, having one DMA table per domain and sharing it between all devices in that domain sounds like a good idea. However, I can't think of any use case for this, and Frank probably had a very special use case in mind where this scenario doesn't appear, hence the one device per domain restriction. One usecase is device access from user-space via VFIO. A userspace process might want to access multiple devices at the same time and VFIO would implement this by assigning all of these devices to the same IOMMU domain. This requirement also comes also from the IOMMU-API itself. The intention of the API is to make different IOMMUs look the same through the API, and this is violated when drivers implement a 1-1 domain-device mapping. So, if having multiple devices per domain is a must, then we probably need a thorough rewrite of the arch/s390/pci/pci_dma.c code. Yes, this is a requirement for new IOMMU drivers. We already have drivers implementing the same 1-1 relation and we are about to fix them. But I don't want to add new drivers doing the same. Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 0/6] vfio: type1: support for ARM SMMUS with VFIO_IOMMU_TYPE1
This patch series makes the VFIO_IOMMU_TYPE1 driver buildable on ARM, so it may be used with ARM SMMUs. It also adds support for the IOMMU_NOEXEC flag supported by SMMUs adhering to the ARM SMMU specification so the VFIO user can specify whether the target memory can be executed by the device behind the SMMU. Changes from v1: - Bugfixes and corrected some typos - Use enum for VFIO IOMMU driver capabilities Antonios Motakis (6): vfio: implement iommu driver capabilities with an enum vfio: type1: support for platform bus devices on ARM vfio: introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag vfio: type1: replace domain wide protection flags with supported capabilities vfio: type1: replace vfio_domains_have_iommu_cache with generic function vfio: type1: implement the VFIO_DMA_MAP_FLAG_NOEXEC flag drivers/vfio/Kconfig| 2 +- drivers/vfio/vfio_iommu_type1.c | 85 - include/uapi/linux/vfio.h | 27 +++-- 3 files changed, 76 insertions(+), 38 deletions(-) -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 3/6] vfio: introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag
We introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag to the VFIO dma map call, and expose its availability via the capability VFIO_DMA_NOEXEC_IOMMU. This way the user can control whether the XN flag will be set on the requested mappings. The IOMMU_NOEXEC flag needs to be available for all the IOMMUs of the container used. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- include/uapi/linux/vfio.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 1e39842..06d66c9 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -30,6 +30,7 @@ enum vfio_iommu_cap { VFIO_DMA_CC_IOMMU = 4, /* IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping) */ VFIO_EEH= 5, /* Check if EEH is supported */ + VFIO_DMA_NOEXEC_IOMMU = 6, }; /* @@ -394,12 +395,17 @@ struct vfio_iommu_type1_info { * * Map process virtual addresses to IO virtual addresses using the * provided struct vfio_dma_map. Caller sets argsz. READ / WRITE required. + * + * To use the VFIO_DMA_MAP_FLAG_NOEXEC flag, the container must support the + * VFIO_DMA_NOEXEC_IOMMU capability. If mappings are created using this flag, + * any groups subsequently added to the container must support this capability. */ struct vfio_iommu_type1_dma_map { __u32 argsz; __u32 flags; #define VFIO_DMA_MAP_FLAG_READ (1 0)/* readable from device */ #define VFIO_DMA_MAP_FLAG_WRITE (1 1) /* writable from device */ +#define VFIO_DMA_MAP_FLAG_NOEXEC (1 2) /* not executable from device */ __u64 vaddr; /* Process virtual address */ __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 6/6] vfio: type1: implement the VFIO_DMA_MAP_FLAG_NOEXEC flag
Some IOMMU drivers, such as the ARM SMMU driver, make available the IOMMU_NOEXEC flag to set the page tables for a device as XN (execute never). This affects devices such as the ARM PL330 DMA Controller, which respects this flag and will refuse to fetch DMA instructions from memory where the XN flag has been set. The flag can be used only if all IOMMU domains behind the container support the IOMMU_NOEXEC flag. Also, if any mappings are created with the flag, any new domains with devices will have to support it as well. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/vfio_iommu_type1.c | 25 - 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 34472e4..c669b5b 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -572,6 +572,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (!prot || !size || (size | iova | vaddr) mask) return -EINVAL; + if (map-flags VFIO_DMA_MAP_FLAG_NOEXEC) { + if (!vfio_domains_have_iommu_cap(iommu, IOMMU_CAP_NOEXEC)) + return -EINVAL; + prot |= IOMMU_NOEXEC; + } + /* Don't allow IOVA or virtual address wrap */ if (iova + size - 1 iova || vaddr + size - 1 vaddr) return -EINVAL; @@ -662,6 +668,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, dma = rb_entry(n, struct vfio_dma, node); iova = dma-iova; + /* +* if any of the mappings to be replayed has the NOEXEC flag +* set, then the new iommu domain must support it +*/ + if ((dma-prot IOMMU_NOEXEC) + !(domain-caps IOMMU_CAP_NOEXEC)) + return -EINVAL; + while (iova dma-iova + dma-size) { phys_addr_t phys = iommu_iova_to_phys(d-domain, iova); size_t size; @@ -749,6 +763,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) domain-caps |= IOMMU_CAP_CACHE_COHERENCY; + if (iommu_capable(bus, IOMMU_CAP_NOEXEC)) + domain-caps |= IOMMU_CAP_NOEXEC; + /* * Try to match an existing compatible domain. We don't want to * preclude an IOMMU driver supporting multiple bus_types and being @@ -900,6 +917,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return 0; return vfio_domains_have_iommu_cap(iommu, IOMMU_CAP_CACHE_COHERENCY); + case VFIO_DMA_NOEXEC_IOMMU: + if (!iommu) + return 0; + return vfio_domains_have_iommu_cap(iommu, + IOMMU_CAP_NOEXEC); default: return 0; } @@ -923,7 +945,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } else if (cmd == VFIO_IOMMU_MAP_DMA) { struct vfio_iommu_type1_dma_map map; uint32_t mask = VFIO_DMA_MAP_FLAG_READ | - VFIO_DMA_MAP_FLAG_WRITE; + VFIO_DMA_MAP_FLAG_WRITE | + VFIO_DMA_MAP_FLAG_NOEXEC; minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 2/6] vfio: type1: support for platform bus devices on ARM
This allows to make use of the VFIO_IOMMU_TYPE1 driver with platform devices on ARM. The driver can then be used with an Exynos SMMU, or ARM SMMU driver. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index d8c5763..a0abe04 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -16,7 +16,7 @@ config VFIO_SPAPR_EEH menuconfig VFIO tristate VFIO Non-Privileged userspace driver framework depends on IOMMU_API - select VFIO_IOMMU_TYPE1 if X86 + select VFIO_IOMMU_TYPE1 if X86 || ARM select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) select ANON_INODES -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 1/6] vfio: implement iommu driver capabilities with an enum
Currently a VFIO driver's IOMMU capabilities are encoded as a series of numerical defines. Replace this with an enum for future maintainability. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- include/uapi/linux/vfio.h | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 6612974..1e39842 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -19,19 +19,18 @@ /* Kernel User level defines for VFIO IOCTLs. */ -/* Extensions */ - -#define VFIO_TYPE1_IOMMU 1 -#define VFIO_SPAPR_TCE_IOMMU 2 -#define VFIO_TYPE1v2_IOMMU 3 /* - * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping). This - * capability is subject to change as groups are added or removed. + * Capabilities exposed by the VFIO IOMMU driver. Some capabilities are subject + * to change as groups are added or removed. */ -#define VFIO_DMA_CC_IOMMU 4 - -/* Check if EEH is supported */ -#define VFIO_EEH 5 +enum vfio_iommu_cap { + VFIO_TYPE1_IOMMU= 1, + VFIO_SPAPR_TCE_IOMMU= 2, + VFIO_TYPE1v2_IOMMU = 3, + VFIO_DMA_CC_IOMMU = 4, /* IOMMU enforces DMA cache coherence + (ex. PCIe NoSnoop stripping) */ + VFIO_EEH= 5, /* Check if EEH is supported */ +}; /* * The IOCTL interface is designed for extensibility by embedding the -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 5/6] vfio: type1: replace vfio_domains_have_iommu_cache with generic function
Replace the function vfio_domains_have_iommu_cache() with a more generic function vfio_domains_have_iommu_cap() which allows to check all domains of an vfio_iommu structure for a given cached capability. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/vfio_iommu_type1.c | 37 +++-- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index aefb3c0..34472e4 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -80,6 +80,23 @@ struct vfio_group { struct list_headnext; }; +static int vfio_domains_have_iommu_cap(struct vfio_iommu *iommu, int cap) +{ + struct vfio_domain *domain; + int ret = 1; + + mutex_lock(iommu-lock); + list_for_each_entry(domain, iommu-domain_list, next) { + if (!(domain-caps cap)) { + ret = 0; + break; + } + } + mutex_unlock(iommu-lock); + + return ret; +} + /* * This code handles mapping and unmapping of user data buffers * into DMA'ble space using the IOMMU @@ -867,23 +884,6 @@ static void vfio_iommu_type1_release(void *iommu_data) kfree(iommu); } -static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) -{ - struct vfio_domain *domain; - int ret = 1; - - mutex_lock(iommu-lock); - list_for_each_entry(domain, iommu-domain_list, next) { - if (!(domain-caps IOMMU_CAP_CACHE_COHERENCY)) { - ret = 0; - break; - } - } - mutex_unlock(iommu-lock); - - return ret; -} - static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -898,7 +898,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, case VFIO_DMA_CC_IOMMU: if (!iommu) return 0; - return vfio_domains_have_iommu_cache(iommu); + return vfio_domains_have_iommu_cap(iommu, + IOMMU_CAP_CACHE_COHERENCY); default: return 0; } -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 4/6] vfio: type1: replace domain wide protection flags with supported capabilities
VFIO_IOMMU_TYPE1 keeps track for each domain it knows a list of protection flags it always applies to all mappings in the domain. This is used for domains that support IOMMU_CAP_CACHE_COHERENCY. Refactor this slightly, by keeping track instead that a given domain supports the capability, and applying the IOMMU_CACHE protection flag when doing the actual DMA mappings. This will allow us to reuse the behavior for IOMMU_CAP_NOEXEC, which we also want to keep track of, but without applying it to all domains that support it unless the user explicitly requests it. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/vfio_iommu_type1.c | 25 + 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 562f686..aefb3c0 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -64,7 +64,7 @@ struct vfio_domain { struct iommu_domain *domain; struct list_headnext; struct list_headgroup_list; - int prot; /* IOMMU_CACHE */ + int caps; }; struct vfio_dma { @@ -485,7 +485,7 @@ static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova, for (i = 0; i npage; i++, pfn++, iova += PAGE_SIZE) { ret = iommu_map(domain-domain, iova, (phys_addr_t)pfn PAGE_SHIFT, - PAGE_SIZE, prot | domain-prot); + PAGE_SIZE, prot); if (ret) break; } @@ -503,11 +503,16 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, int ret; list_for_each_entry(d, iommu-domain_list, next) { + int dprot = prot; + + if (d-caps IOMMU_CAP_CACHE_COHERENCY) + dprot |= IOMMU_CACHE; + ret = iommu_map(d-domain, iova, (phys_addr_t)pfn PAGE_SHIFT, - npage PAGE_SHIFT, prot | d-prot); + npage PAGE_SHIFT, dprot); if (ret) { if (ret != -EBUSY || - map_try_harder(d, iova, pfn, npage, prot)) + map_try_harder(d, iova, pfn, npage, dprot)) goto unwind; } } @@ -620,6 +625,10 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *d; struct rb_node *n; int ret; + int dprot = 0; + + if (domain-caps IOMMU_CAP_CACHE_COHERENCY) + dprot |= IOMMU_CACHE; /* Arbitrarily pick the first domain in the list for lookups */ d = list_first_entry(iommu-domain_list, struct vfio_domain, next); @@ -653,7 +662,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, size += PAGE_SIZE; ret = iommu_map(domain-domain, iova, phys, - size, dma-prot | domain-prot); + size, dma-prot | dprot); if (ret) return ret; @@ -721,7 +730,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) - domain-prot |= IOMMU_CACHE; + domain-caps |= IOMMU_CAP_CACHE_COHERENCY; /* * Try to match an existing compatible domain. We don't want to @@ -732,7 +741,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, */ list_for_each_entry(d, iommu-domain_list, next) { if (d-domain-ops == domain-domain-ops - d-prot == domain-prot) { + d-caps == domain-caps) { iommu_detach_group(domain-domain, iommu_group); if (!iommu_attach_group(d-domain, iommu_group)) { list_add(group-next, d-group_list); @@ -865,7 +874,7 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) mutex_lock(iommu-lock); list_for_each_entry(domain, iommu-domain_list, next) { - if (!(domain-prot IOMMU_CACHE)) { + if (!(domain-caps IOMMU_CAP_CACHE_COHERENCY)) { ret = 0; break; } -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 00/19] VFIO support for platform and AMBA devices on ARM
This patch series aims to implement VFIO support for platform devices that reside behind an IOMMU. Examples of such devices are devices behind an ARM SMMU, or behind a Samsung Exynos System MMU. The API used is based on the existing VFIO API that is also used with PCI devices. Only devices that include a basic set of IRQs and memory regions are targeted; devices with complex relationships with other devices on a device tree are not taken into account at this stage. This patch series should be applied at least on the following series/patches: - [PATCH 0/3] iommu: replace IOMMU_EXEC with IOMMU_EXEC and update ARM SMMU driver - [PATCH] driver core: amba: add device binding path 'driver_override' - [PATCH v2 0/6] vfio: type1: support for ARM SMMUS with VFIO_IOMMU_TYPE1 A copy can be cloned from the branch vfio-platform-v9 at: g...@github.com:virtualopensystems/linux-kvm-arm.git Changes since v8: - Separate irq handler for edge and level triggered interrupts - Mutex based lock for VFIO fd open/release - Fixed bug where the first region of a platform device wasn't exposed - Read only regions can be MMAPed only read only - Code cleanups Changes since v7: - Some initial placeholder functionality for PIO resources - Cleaned up code for IRQ triggering, masking and unmasking - Some functionality has been removed from this series and posted separately: - VFIO_IOMMU_TYPE1 support for ARM SMMUs - IOMMU NOEXEC patches - driver_override functionality for AMBA devices - Several fixes Changes since v6: - Integrated support for AMBA devices - Numerous cleanups and fixes Changes since v5: - Full eventfd support for IRQ masking and unmasking. - Changed IOMMU_EXEC to IOMMU_NOEXEC, along with related flags in VFIO. - Other fixes based on reviewer comments. Changes since v4: - Use static offsets for each region in the VFIO device fd - Include patch in the series for the ARM SMMU to expose IOMMU_EXEC availability via IOMMU_CAP_DMA_EXEC - Rebased on VFIO multi domain support: - IOMMU_EXEC is now available if at least one IOMMU in the container supports it - Expose IOMMU_EXEC if available via the capability VFIO_IOMMU_PROT_EXEC - Some bug fixes Changes since v3: - Use Kim Phillips' driver_probe_device() Changes since v2: - Fixed Read/Write and MMAP on device regions - Removed dependency on Device Tree - Interrupts support - Interrupt masking/unmasking - Automask level sensitive interrupts - Introduced VFIO_DMA_MAP_FLAG_EXEC - Code clean ups Antonios Motakis (19): vfio/platform: initial skeleton of VFIO support for platform devices vfio: platform: probe to devices on the platform bus vfio: platform: add the VFIO PLATFORM module to Kconfig vfio: amba: VFIO support for AMBA devices vfio: amba: add the VFIO for AMBA devices module to Kconfig vfio/platform: return info for bound device vfio/platform: return info for device memory mapped IO regions vfio/platform: read and write support for the device fd vfio/platform: support MMAP of MMIO regions vfio/platform: return IRQ info vfio/platform: initial interrupts support code vfio/platform: trigger an interrupt via eventfd vfio/platform: support for level sensitive interrupts vfio: move eventfd support code for VFIO_PCI to a separate file vfio: add local lock in virqfd instead of depending on VFIO PCI vfio: pass an opaque pointer on virqfd initialization vfio: virqfd: add vfio_ prefix to virqfd_enable and virqfd_disable vfio: initialize the virqfd workqueue in VFIO generic code vfio/platform: implement IRQ masking/unmasking via an eventfd drivers/vfio/Kconfig | 1 + drivers/vfio/Makefile | 5 +- drivers/vfio/pci/vfio_pci.c | 8 - drivers/vfio/pci/vfio_pci_intrs.c | 238 +--- drivers/vfio/pci/vfio_pci_private.h | 3 - drivers/vfio/platform/Kconfig | 19 + drivers/vfio/platform/Makefile| 8 + drivers/vfio/platform/vfio_amba.c | 116 ++ drivers/vfio/platform/vfio_platform.c | 114 ++ drivers/vfio/platform/vfio_platform_common.c | 529 ++ drivers/vfio/platform/vfio_platform_irq.c | 337 drivers/vfio/platform/vfio_platform_private.h | 81 drivers/vfio/vfio.c | 8 + drivers/vfio/virqfd.c | 215 +++ include/linux/vfio.h | 27 ++ include/uapi/linux/vfio.h | 2 + 16 files changed, 1475 insertions(+), 236 deletions(-) create mode 100644 drivers/vfio/platform/Kconfig create mode 100644 drivers/vfio/platform/Makefile create mode 100644 drivers/vfio/platform/vfio_amba.c create mode 100644 drivers/vfio/platform/vfio_platform.c create mode 100644 drivers/vfio/platform/vfio_platform_common.c create mode 100644 drivers/vfio/platform/vfio_platform_irq.c create mode 100644
[PATCH v9 01/19] vfio/platform: initial skeleton of VFIO support for platform devices
This patch forms the common skeleton code for platform devices support with VFIO. This will include the core functionality of VFIO_PLATFORM, however binding to the device and discovering the device resources will be done with the help of a separate file where any Linux platform bus specific code will reside. This will allow us to implement support for also discovering AMBA devices and their resources, but still reuse a large part of the VFIO_PLATFORM implementation. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_common.c | 126 ++ drivers/vfio/platform/vfio_platform_private.h | 36 2 files changed, 162 insertions(+) create mode 100644 drivers/vfio/platform/vfio_platform_common.c create mode 100644 drivers/vfio/platform/vfio_platform_private.h diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c new file mode 100644 index 000..e0fdbc8 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis a.mota...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/device.h +#include linux/interrupt.h +#include linux/iommu.h +#include linux/module.h +#include linux/mutex.h +#include linux/notifier.h +#include linux/pm_runtime.h +#include linux/slab.h +#include linux/types.h +#include linux/uaccess.h +#include linux/vfio.h +#include linux/io.h + +#include vfio_platform_private.h + +static void vfio_platform_release(void *device_data) +{ + module_put(THIS_MODULE); +} + +static int vfio_platform_open(void *device_data) +{ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + return 0; +} + +static long vfio_platform_ioctl(void *device_data, + unsigned int cmd, unsigned long arg) +{ + if (cmd == VFIO_DEVICE_GET_INFO) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_GET_REGION_INFO) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_SET_IRQS) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_RESET) + return -EINVAL; + + return -ENOTTY; +} + +static ssize_t vfio_platform_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + return -EINVAL; +} + +static ssize_t vfio_platform_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + return -EINVAL; +} + +static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma) +{ + return -EINVAL; +} + +static const struct vfio_device_ops vfio_platform_ops = { + .name = vfio-platform, + .open = vfio_platform_open, + .release= vfio_platform_release, + .ioctl = vfio_platform_ioctl, + .read = vfio_platform_read, + .write = vfio_platform_write, + .mmap = vfio_platform_mmap, +}; + +int vfio_platform_probe_common(struct vfio_platform_device *vdev, + struct device *dev) +{ + struct iommu_group *group; + int ret; + + if (!vdev) + return -EINVAL; + + group = iommu_group_get(dev); + if (!group) { + pr_err(VFIO: No IOMMU group for device %s\n, vdev-name); + return -EINVAL; + } + + ret = vfio_add_group_dev(dev, vfio_platform_ops, vdev); + if (ret) { + iommu_group_put(group); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_platform_probe_common); + +struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_del_group_dev(dev); + if (vdev) + iommu_group_put(dev-iommu_group); + + return vdev; +} +EXPORT_SYMBOL_GPL(vfio_platform_remove_common); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h new file mode 100644 index 000..062b92d --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis a.mota...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify
[PATCH v9 03/19] vfio: platform: add the VFIO PLATFORM module to Kconfig
Enable building the VFIO PLATFORM driver that allows to use Linux platform devices with VFIO. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/Kconfig | 1 + drivers/vfio/Makefile | 1 + drivers/vfio/platform/Kconfig | 9 + drivers/vfio/platform/Makefile | 4 4 files changed, 15 insertions(+) create mode 100644 drivers/vfio/platform/Kconfig create mode 100644 drivers/vfio/platform/Makefile diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index a0abe04..962fb80 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -27,3 +27,4 @@ menuconfig VFIO If you don't know what to do here, say N. source drivers/vfio/pci/Kconfig +source drivers/vfio/platform/Kconfig diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 0b035b1..dadf0ca 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ +obj-$(CONFIG_VFIO_PLATFORM) += platform/ diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig new file mode 100644 index 000..c51af17 --- /dev/null +++ b/drivers/vfio/platform/Kconfig @@ -0,0 +1,9 @@ +config VFIO_PLATFORM + tristate VFIO support for platform devices + depends on VFIO EVENTFD ARM + help + Support for platform devices with VFIO. This is required to make + use of platform devices present on the system using the VFIO + framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile new file mode 100644 index 000..279862b --- /dev/null +++ b/drivers/vfio/platform/Makefile @@ -0,0 +1,4 @@ + +vfio-platform-y := vfio_platform.o vfio_platform_common.o + +obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 04/19] vfio: amba: VFIO support for AMBA devices
Add support for discovering AMBA devices with VFIO and handle them similarly to Linux platform devices. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_amba.c | 116 ++ include/uapi/linux/vfio.h | 1 + 2 files changed, 117 insertions(+) create mode 100644 drivers/vfio/platform/vfio_amba.c diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c new file mode 100644 index 000..cf61324 --- /dev/null +++ b/drivers/vfio/platform/vfio_amba.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis a.mota...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/device.h +#include linux/interrupt.h +#include linux/iommu.h +#include linux/module.h +#include linux/mutex.h +#include linux/notifier.h +#include linux/pm_runtime.h +#include linux/slab.h +#include linux/types.h +#include linux/uaccess.h +#include linux/vfio.h +#include linux/io.h +#include linux/irq.h +#include linux/amba/bus.h + +#include vfio_platform_private.h + +#define DRIVER_VERSION 0.9 +#define DRIVER_AUTHOR Antonios Motakis a.mota...@virtualopensystems.com +#define DRIVER_DESC VFIO for AMBA devices - User Level meta-driver + +/* probing devices from the AMBA bus */ + +static struct resource *get_amba_resource(struct vfio_platform_device *vdev, + int i) +{ + struct amba_device *adev = (struct amba_device *) vdev-opaque; + + if (i == 0) + return adev-res; + + return NULL; +} + +static int get_amba_irq(struct vfio_platform_device *vdev, int i) +{ + struct amba_device *adev = (struct amba_device *) vdev-opaque; + + if (i AMBA_NR_IRQS) + return adev-irq[i]; + + return 0; +} + +static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) +{ + + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev-opaque = (void *) adev; + vdev-name = vfio-amba-dev; + vdev-flags = VFIO_DEVICE_FLAGS_AMBA; + vdev-get_resource = get_amba_resource; + vdev-get_irq = get_amba_irq; + + ret = vfio_platform_probe_common(vdev, adev-dev); + if (ret) + kfree(vdev); + + return ret; +} + +static int vfio_amba_remove(struct amba_device *adev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(adev-dev); + if(vdev) { + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct amba_id pl330_ids[] = { + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl330_ids); + +static struct amba_driver vfio_amba_driver = { + .probe = vfio_amba_probe, + .remove = vfio_amba_remove, + .id_table = pl330_ids, + .drv = { + .name = vfio-amba, + .owner = THIS_MODULE, + }, +}; + +module_amba_driver(vfio_amba_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE(GPL v2); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 9db1056..92469e0 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -158,6 +158,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_RESET(1 0)/* Device supports reset */ #define VFIO_DEVICE_FLAGS_PCI (1 1)/* vfio-pci device */ #define VFIO_DEVICE_FLAGS_PLATFORM (1 2)/* vfio-platform device */ +#define VFIO_DEVICE_FLAGS_AMBA (1 3) /* vfio-amba device */ __u32 num_regions;/* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 06/19] vfio/platform: return info for bound device
A VFIO userspace driver will start by opening the VFIO device that corresponds to an IOMMU group, and will use the ioctl interface to get the basic device info, such as number of memory regions and interrupts, and their properties. This patch enables the VFIO_DEVICE_GET_INFO ioctl call. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_common.c | 23 --- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index e0fdbc8..cb20526 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -43,10 +43,27 @@ static int vfio_platform_open(void *device_data) static long vfio_platform_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { - if (cmd == VFIO_DEVICE_GET_INFO) - return -EINVAL; + struct vfio_platform_device *vdev = device_data; + unsigned long minsz; + + if (cmd == VFIO_DEVICE_GET_INFO) { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz minsz) + return -EINVAL; + + info.flags = vdev-flags; + info.num_regions = 0; + info.num_irqs = 0; + + return copy_to_user((void __user *)arg, info, minsz); - else if (cmd == VFIO_DEVICE_GET_REGION_INFO) + } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) return -EINVAL; else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 05/19] vfio: amba: add the VFIO for AMBA devices module to Kconfig
Enable building the VFIO AMBA driver. VFIO_AMBA depends on VFIO_PLATFORM, since it is sharing a portion of the code, and it is essentially implemented as a platform device whose resources are discovered via AMBA specific APIs in the kernel. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/Kconfig | 10 ++ drivers/vfio/platform/Makefile | 4 2 files changed, 14 insertions(+) diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index c51af17..c0a3bff 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -7,3 +7,13 @@ config VFIO_PLATFORM framework. If you don't know what to do here, say N. + +config VFIO_AMBA + tristate VFIO support for AMBA devices + depends on VFIO_PLATFORM ARM_AMBA + help + Support for ARM AMBA devices with VFIO. This is required to make + use of ARM AMBA devices present on the system using the VFIO + framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile index 279862b..1957170 100644 --- a/drivers/vfio/platform/Makefile +++ b/drivers/vfio/platform/Makefile @@ -2,3 +2,7 @@ vfio-platform-y := vfio_platform.o vfio_platform_common.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o + +vfio-amba-y := vfio_amba.o + +obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 08/19] vfio/platform: read and write support for the device fd
VFIO returns a file descriptor which we can use to manipulate the memory regions of the device. Usually, the user will mmap memory regions that are addressable on page boundaries, however for memory regions where this is not the case we cannot provide mmap functionality due to security concerns. For this reason we also need allow to read and write to the memory regions via the file descriptor. Implement this funcionality only for MMIO regions of platform devices; PIO regions are not being handled at this point. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_common.c | 150 ++ drivers/vfio/platform/vfio_platform_private.h | 1 + 2 files changed, 151 insertions(+) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 82de752..e10a8d0 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -55,6 +55,10 @@ static int vfio_platform_regions_init(struct vfio_platform_device *vdev) switch (resource_type(res)) { case IORESOURCE_MEM: vdev-regions[i].type = VFIO_PLATFORM_REGION_TYPE_MMIO; + vdev-regions[i].flags |= VFIO_REGION_INFO_FLAG_READ; + if (!(res-flags IORESOURCE_READONLY)) + vdev-regions[i].flags |= + VFIO_REGION_INFO_FLAG_WRITE; break; case IORESOURCE_IO: vdev-regions[i].type = VFIO_PLATFORM_REGION_TYPE_PIO; @@ -74,6 +78,11 @@ err: static void vfio_platform_regions_cleanup(struct vfio_platform_device *vdev) { + int i; + + for (i = 0; i vdev-num_regions; i++) + iounmap(vdev-regions[i].ioaddr); + vdev-num_regions = 0; kfree(vdev-regions); } @@ -176,15 +185,156 @@ static long vfio_platform_ioctl(void *device_data, return -ENOTTY; } +static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, + char __user *buf, size_t count, + loff_t off) +{ + unsigned int done = 0; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + + if (!reg.ioaddr) + return -ENOMEM; + } + + while (count) { + size_t filled; + + if (count = 4 !(off % 4)) { + u32 val; + + val = ioread32(reg.ioaddr + off); + if (copy_to_user(buf, val, 4)) + goto err; + + filled = 4; + } else if (count = 2 !(off % 2)) { + u16 val; + + val = ioread16(reg.ioaddr + off); + if (copy_to_user(buf, val, 2)) + goto err; + + filled = 2; + } else { + u8 val; + + val = ioread8(reg.ioaddr + off); + if (copy_to_user(buf, val, 1)) + goto err; + + filled = 1; + } + + + count -= filled; + done += filled; + off += filled; + buf += filled; + } + + return done; +err: + return -EFAULT; +} + static ssize_t vfio_platform_read(void *device_data, char __user *buf, size_t count, loff_t *ppos) { + struct vfio_platform_device *vdev = device_data; + unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); + loff_t off = *ppos VFIO_PLATFORM_OFFSET_MASK; + + if (index = vdev-num_regions) + return -EINVAL; + + if (!(vdev-regions[index].flags VFIO_REGION_INFO_FLAG_READ)) + return -EINVAL; + + if (vdev-regions[index].type VFIO_PLATFORM_REGION_TYPE_MMIO) + return vfio_platform_read_mmio(vdev-regions[index], + buf, count, off); + else if (vdev-regions[index].type VFIO_PLATFORM_REGION_TYPE_PIO) + return -EINVAL; /* not implemented */ + return -EINVAL; } +static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, + const char __user *buf, size_t count, + loff_t off) +{ + unsigned int done = 0; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + + if (!reg.ioaddr) + return -ENOMEM; + } + + while (count) { + size_t filled; + + if (count = 4 !(off % 4)) { +
[PATCH v9 07/19] vfio/platform: return info for device memory mapped IO regions
This patch enables the IOCTLs VFIO_DEVICE_GET_REGION_INFO ioctl call, which allows the user to learn about the available MMIO resources of a device. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_common.c | 110 +- drivers/vfio/platform/vfio_platform_private.h | 22 ++ 2 files changed, 128 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index cb20526..82de752 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -27,17 +27,97 @@ #include vfio_platform_private.h +static DEFINE_MUTEX(driver_lock); + +static int vfio_platform_regions_init(struct vfio_platform_device *vdev) +{ + int cnt = 0, i; + + while (vdev-get_resource(vdev, cnt)) + cnt++; + + vdev-regions = kcalloc(cnt, sizeof(struct vfio_platform_region), + GFP_KERNEL); + if (!vdev-regions) + return -ENOMEM; + + for (i = 0; i cnt; i++) { + struct resource *res = + vdev-get_resource(vdev, i); + + if (!res) + goto err; + + vdev-regions[i].addr = res-start; + vdev-regions[i].size = resource_size(res); + vdev-regions[i].flags = 0; + + switch (resource_type(res)) { + case IORESOURCE_MEM: + vdev-regions[i].type = VFIO_PLATFORM_REGION_TYPE_MMIO; + break; + case IORESOURCE_IO: + vdev-regions[i].type = VFIO_PLATFORM_REGION_TYPE_PIO; + break; + default: + goto err; + } + } + + vdev-num_regions = cnt; + + return 0; +err: + kfree(vdev-regions); + return -EINVAL; +} + +static void vfio_platform_regions_cleanup(struct vfio_platform_device *vdev) +{ + vdev-num_regions = 0; + kfree(vdev-regions); +} + static void vfio_platform_release(void *device_data) { + struct vfio_platform_device *vdev = device_data; + + mutex_lock(driver_lock); + + if (!(--vdev-refcnt)) { + vfio_platform_regions_cleanup(vdev); + } + + mutex_unlock(driver_lock); + module_put(THIS_MODULE); } static int vfio_platform_open(void *device_data) { + struct vfio_platform_device *vdev = device_data; + int ret; + if (!try_module_get(THIS_MODULE)) return -ENODEV; + mutex_lock(driver_lock); + + if (!vdev-refcnt) { + ret = vfio_platform_regions_init(vdev); + if (ret) + goto err_reg; + } + + vdev-refcnt++; + + mutex_unlock(driver_lock); return 0; + +err_reg: + mutex_unlock(driver_lock); + module_put(THIS_MODULE); + return ret; } static long vfio_platform_ioctl(void *device_data, @@ -58,15 +138,33 @@ static long vfio_platform_ioctl(void *device_data, return -EINVAL; info.flags = vdev-flags; - info.num_regions = 0; + info.num_regions = vdev-num_regions; info.num_irqs = 0; return copy_to_user((void __user *)arg, info, minsz); - } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { + struct vfio_region_info info; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(info, (void __user *)arg, minsz)) + return -EFAULT; - else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) + if (info.argsz minsz) + return -EINVAL; + + if (info.index = vdev-num_regions) + return -EINVAL; + + /* map offset to the physical address */ + info.offset = VFIO_PLATFORM_INDEX_TO_OFFSET(info.index); + info.size = vdev-regions[info.index].size; + info.flags = vdev-regions[info.index].flags; + + return copy_to_user((void __user *)arg, info, minsz); + + } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) return -EINVAL; else if (cmd == VFIO_DEVICE_SET_IRQS) @@ -134,10 +232,14 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) { struct vfio_platform_device *vdev; + mutex_lock(driver_lock); + vdev = vfio_del_group_dev(dev); if (vdev) iommu_group_put(dev-iommu_group); + mutex_unlock(driver_lock); + return vdev; } EXPORT_SYMBOL_GPL(vfio_platform_remove_common); diff --git a/drivers/vfio/platform/vfio_platform_private.h
[PATCH v9 10/19] vfio/platform: return IRQ info
Return information for the interrupts exposed by the device. This patch extends VFIO_DEVICE_GET_INFO with the number of IRQs and enables VFIO_DEVICE_GET_IRQ_INFO. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/Makefile| 2 +- drivers/vfio/platform/vfio_platform_common.c | 31 -- drivers/vfio/platform/vfio_platform_irq.c | 59 +++ drivers/vfio/platform/vfio_platform_private.h | 10 + 4 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 drivers/vfio/platform/vfio_platform_irq.c diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile index 1957170..81de144 100644 --- a/drivers/vfio/platform/Makefile +++ b/drivers/vfio/platform/Makefile @@ -1,5 +1,5 @@ -vfio-platform-y := vfio_platform.o vfio_platform_common.o +vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index aeaaec9..5f2c205 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -105,6 +105,7 @@ static void vfio_platform_release(void *device_data) if (!(--vdev-refcnt)) { vfio_platform_regions_cleanup(vdev); + vfio_platform_irq_cleanup(vdev); } mutex_unlock(driver_lock); @@ -126,6 +127,10 @@ static int vfio_platform_open(void *device_data) ret = vfio_platform_regions_init(vdev); if (ret) goto err_reg; + + ret = vfio_platform_irq_init(vdev); + if (ret) + goto err_irq; } vdev-refcnt++; @@ -133,6 +138,8 @@ static int vfio_platform_open(void *device_data) mutex_unlock(driver_lock); return 0; +err_irq: + vfio_platform_regions_cleanup(vdev); err_reg: mutex_unlock(driver_lock); module_put(THIS_MODULE); @@ -158,7 +165,7 @@ static long vfio_platform_ioctl(void *device_data, info.flags = vdev-flags; info.num_regions = vdev-num_regions; - info.num_irqs = 0; + info.num_irqs = vdev-num_irqs; return copy_to_user((void __user *)arg, info, minsz); @@ -183,10 +190,26 @@ static long vfio_platform_ioctl(void *device_data, return copy_to_user((void __user *)arg, info, minsz); - } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz minsz) + return -EINVAL; + + if (info.index = vdev-num_irqs) + return -EINVAL; + + info.flags = vdev-irqs[info.index].flags; + info.count = vdev-irqs[info.index].count; + + return copy_to_user((void __user *)arg, info, minsz); - else if (cmd == VFIO_DEVICE_SET_IRQS) + } else if (cmd == VFIO_DEVICE_SET_IRQS) return -EINVAL; else if (cmd == VFIO_DEVICE_RESET) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c new file mode 100644 index 000..d99c71c --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -0,0 +1,59 @@ +/* + * VFIO platform devices interrupt handling + * + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis a.mota...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/device.h +#include linux/eventfd.h +#include linux/interrupt.h +#include linux/iommu.h +#include linux/module.h +#include linux/mutex.h +#include linux/notifier.h +#include linux/pm_runtime.h +#include linux/slab.h +#include linux/types.h +#include linux/uaccess.h +#include linux/vfio.h +#include linux/platform_device.h +#include linux/irq.h + +#include vfio_platform_private.h + +int vfio_platform_irq_init(struct vfio_platform_device *vdev) +{ + int cnt = 0, i; + + while (vdev-get_irq(vdev, cnt) 0) + cnt++; + + vdev-irqs = kcalloc(cnt, sizeof(struct vfio_platform_irq), GFP_KERNEL); + if (!vdev-irqs) + return -ENOMEM; + +
[PATCH v9 12/19] vfio/platform: trigger an interrupt via eventfd
This patch allows to set an eventfd for a patform device's interrupt, and also to trigger the interrupt eventfd from userspace for testing. Level sensitive interrupts are marked as maskable and are handled in a later patch. Edge triggered interrupts are not advertised as maskable and are implemented here using a simple and efficient IRQ handler. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_irq.c | 93 ++- drivers/vfio/platform/vfio_platform_private.h | 2 + 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index 007b386..2ac8ed7 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -45,11 +45,91 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, return -EINVAL; } +static irqreturn_t vfio_irq_handler(int irq, void *dev_id) +{ + struct vfio_platform_irq *irq_ctx = dev_id; + + eventfd_signal(irq_ctx-trigger, 1); + + return IRQ_HANDLED; +} + +static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, + int fd, irq_handler_t handler) +{ + struct vfio_platform_irq *irq = vdev-irqs[index]; + struct eventfd_ctx *trigger; + int ret; + + if (irq-trigger) { + free_irq(irq-hwirq, irq); + kfree(irq-name); + eventfd_ctx_put(irq-trigger); + irq-trigger = NULL; + } + + if (fd 0) /* Disable only */ + return 0; + + irq-name = kasprintf(GFP_KERNEL, vfio-irq[%d](%s), + irq-hwirq, vdev-name); + if (!irq-name) + return -ENOMEM; + + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + kfree(irq-name); + return PTR_ERR(trigger); + } + + irq-trigger = trigger; + + ret = request_irq(irq-hwirq, handler, 0, irq-name, irq); + if (ret) { + kfree(irq-name); + eventfd_ctx_put(trigger); + irq-trigger = NULL; + return ret; + } + + return 0; +} + static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - return -EINVAL; + struct vfio_platform_irq *irq = vdev-irqs[index]; + irq_handler_t handler; + + if (vdev-irqs[index].flags VFIO_IRQ_INFO_MASKABLE) + return -EINVAL; /* not implemented */ + else + handler = vfio_irq_handler; + + if (!count (flags VFIO_IRQ_SET_DATA_NONE)) + return vfio_set_trigger(vdev, index, -1, handler); + + if (start != 0 || count != 1) + return -EINVAL; + + if (flags VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + return vfio_set_trigger(vdev, index, fd, handler); + } + + if (flags VFIO_IRQ_SET_DATA_NONE) { + handler(irq-hwirq, irq); + + } else if (flags VFIO_IRQ_SET_DATA_BOOL) { + uint8_t trigger = *(uint8_t *)data; + + if (trigger) + handler(irq-hwirq, irq); + } + + return 0; } int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, @@ -95,7 +175,11 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) if (hwirq 0) goto err; - vdev-irqs[i].flags = 0; + vdev-irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; + + if (irq_get_trigger_type(hwirq) IRQ_TYPE_LEVEL_MASK) + vdev-irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE; + vdev-irqs[i].count = 1; vdev-irqs[i].hwirq = hwirq; } @@ -110,6 +194,11 @@ err: void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) { + int i; + + for (i = 0; i vdev-num_irqs; i++) + vfio_set_trigger(vdev, i, -1, NULL); + vdev-num_irqs = 0; kfree(vdev-irqs); } diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index ffa2459..a3f2411 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -28,6 +28,8 @@ struct vfio_platform_irq { u32 flags; u32 count; int hwirq; + char*name; + struct eventfd_ctx *trigger; }; struct vfio_platform_region { -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org
[PATCH v9 11/19] vfio/platform: initial interrupts support code
This patch is a skeleton for the VFIO_DEVICE_SET_IRQS IOCTL, around which most IRQ functionality is implemented in VFIO. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_common.c | 52 +++-- drivers/vfio/platform/vfio_platform_irq.c | 56 +++ drivers/vfio/platform/vfio_platform_private.h | 6 +++ 3 files changed, 111 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 5f2c205..782b4d3 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -209,10 +209,54 @@ static long vfio_platform_ioctl(void *device_data, return copy_to_user((void __user *)arg, info, minsz); - } else if (cmd == VFIO_DEVICE_SET_IRQS) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_SET_IRQS) { + struct vfio_irq_set hdr; + u8 *data = NULL; + int ret = 0; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if (hdr.argsz minsz) + return -EINVAL; + + if (hdr.index = vdev-num_irqs) + return -EINVAL; + + if (hdr.flags ~(VFIO_IRQ_SET_DATA_TYPE_MASK | + VFIO_IRQ_SET_ACTION_TYPE_MASK)) + return -EINVAL; - else if (cmd == VFIO_DEVICE_RESET) + if (!(hdr.flags VFIO_IRQ_SET_DATA_NONE)) { + size_t size; + + if (hdr.flags VFIO_IRQ_SET_DATA_BOOL) + size = sizeof(uint8_t); + else if (hdr.flags VFIO_IRQ_SET_DATA_EVENTFD) + size = sizeof(int32_t); + else + return -EINVAL; + + if (hdr.argsz - minsz size) + return -EINVAL; + + data = memdup_user((void __user *)(arg + minsz), size); + if (IS_ERR(data)) + return PTR_ERR(data); + } + + mutex_lock(vdev-igate); + + ret = vfio_platform_set_irqs_ioctl(vdev, hdr.flags, hdr.index, + hdr.start, hdr.count, data); + mutex_unlock(vdev-igate); + kfree(data); + + return ret; + + } else if (cmd == VFIO_DEVICE_RESET) return -EINVAL; return -ENOTTY; @@ -462,6 +506,8 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, return ret; } + mutex_init(vdev-igate); + return 0; } EXPORT_SYMBOL_GPL(vfio_platform_probe_common); diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index d99c71c..007b386 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -31,6 +31,53 @@ #include vfio_platform_private.h +static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, void *data) +{ + return -EINVAL; +} + +static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, void *data) +{ + return -EINVAL; +} + +static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, +unsigned index, unsigned start, +unsigned count, uint32_t flags, void *data) +{ + return -EINVAL; +} + +int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, +uint32_t flags, unsigned index, unsigned start, +unsigned count, void *data) +{ + int (*func)(struct vfio_platform_device *vdev, unsigned index, + unsigned start, unsigned count, uint32_t flags, + void *data) = NULL; + + switch (flags VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: + func = vfio_platform_set_irq_mask; + break; + case VFIO_IRQ_SET_ACTION_UNMASK: + func = vfio_platform_set_irq_unmask; + break; + case VFIO_IRQ_SET_ACTION_TRIGGER: + func = vfio_platform_set_irq_trigger; + break; + } + + if (!func) + return -ENOTTY; + + return func(vdev, index, start, count, flags, data); +} + int
[PATCH v9 19/19] vfio/platform: implement IRQ masking/unmasking via an eventfd
With this patch the VFIO user will be able to set an eventfd that can be used in order to mask and unmask IRQs of platform devices. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_irq.c | 47 --- drivers/vfio/platform/vfio_platform_private.h | 2 ++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index 563abf6..08d400e 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -45,6 +45,15 @@ static void vfio_platform_mask(struct vfio_platform_irq *irq_ctx) spin_unlock_irqrestore(irq_ctx-lock, flags); } +static int vfio_platform_mask_handler(void *opaque, void *unused) +{ + struct vfio_platform_irq *irq_ctx = opaque; + + vfio_platform_mask(irq_ctx); + + return 0; +} + static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) @@ -55,8 +64,18 @@ static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, if (!(vdev-irqs[index].flags VFIO_IRQ_INFO_MASKABLE)) return -EINVAL; - if (flags VFIO_IRQ_SET_DATA_EVENTFD) - return -EINVAL; /* not implemented yet */ + if (flags VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + if (fd = 0) + return vfio_virqfd_enable((void *) vdev-irqs[index], + vfio_platform_mask_handler, + NULL, NULL, + vdev-irqs[index].mask, fd); + + vfio_virqfd_disable(vdev-irqs[index].mask); + return 0; + } if (flags VFIO_IRQ_SET_DATA_NONE) { vfio_platform_mask(vdev-irqs[index]); @@ -85,6 +104,15 @@ static void vfio_platform_unmask(struct vfio_platform_irq *irq_ctx) spin_unlock_irqrestore(irq_ctx-lock, flags); } +static int vfio_platform_unmask_handler(void *opaque, void *unused) +{ + struct vfio_platform_irq *irq_ctx = opaque; + + vfio_platform_unmask(irq_ctx); + + return 0; +} + static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) @@ -95,8 +123,19 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, if (!(vdev-irqs[index].flags VFIO_IRQ_INFO_MASKABLE)) return -EINVAL; - if (flags VFIO_IRQ_SET_DATA_EVENTFD) - return -EINVAL; /* not implemented yet */ + if (flags VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + if (fd = 0) + return vfio_virqfd_enable((void *) vdev-irqs[index], + vfio_platform_unmask_handler, + NULL, NULL, + vdev-irqs[index].unmask, + fd); + + vfio_virqfd_disable(vdev-irqs[index].unmask); + return 0; + } if (flags VFIO_IRQ_SET_DATA_NONE) { vfio_platform_unmask(vdev-irqs[index]); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index d25c4cd..70c7527 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -32,6 +32,8 @@ struct vfio_platform_irq { struct eventfd_ctx *trigger; boolmasked; spinlock_t lock; + struct virqfd *unmask; + struct virqfd *mask; }; struct vfio_platform_region { -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 14/19] vfio: move eventfd support code for VFIO_PCI to a separate file
The virqfd functionality that is used by VFIO_PCI to implement interrupt masking and unmasking via an eventfd, is generic enough and can be reused by another driver. Move it to a separate file in order to allow the code to be shared. Also properly export virqfd_enable and virqfd_disable in the process. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/Makefile | 4 +- drivers/vfio/pci/vfio_pci_intrs.c | 213 --- drivers/vfio/pci/vfio_pci_private.h | 3 - drivers/vfio/virqfd.c | 214 include/linux/vfio.h| 28 + 5 files changed, 245 insertions(+), 217 deletions(-) create mode 100644 drivers/vfio/virqfd.c diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index dadf0ca..d798b09 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,4 +1,6 @@ -obj-$(CONFIG_VFIO) += vfio.o +vfio_core-y := vfio.o virqfd.o + +obj-$(CONFIG_VFIO) += vfio_core.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 9dd49c9..3f909bb 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -18,226 +18,13 @@ #include linux/eventfd.h #include linux/pci.h #include linux/file.h -#include linux/poll.h #include linux/vfio.h #include linux/wait.h -#include linux/workqueue.h #include linux/slab.h #include vfio_pci_private.h /* - * IRQfd - generic - */ -struct virqfd { - struct vfio_pci_device *vdev; - struct eventfd_ctx *eventfd; - int (*handler)(struct vfio_pci_device *, void *); - void(*thread)(struct vfio_pci_device *, void *); - void*data; - struct work_struct inject; - wait_queue_twait; - poll_table pt; - struct work_struct shutdown; - struct virqfd **pvirqfd; -}; - -static struct workqueue_struct *vfio_irqfd_cleanup_wq; - -int __init vfio_pci_virqfd_init(void) -{ - vfio_irqfd_cleanup_wq = - create_singlethread_workqueue(vfio-irqfd-cleanup); - if (!vfio_irqfd_cleanup_wq) - return -ENOMEM; - - return 0; -} - -void vfio_pci_virqfd_exit(void) -{ - destroy_workqueue(vfio_irqfd_cleanup_wq); -} - -static void virqfd_deactivate(struct virqfd *virqfd) -{ - queue_work(vfio_irqfd_cleanup_wq, virqfd-shutdown); -} - -static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - struct virqfd *virqfd = container_of(wait, struct virqfd, wait); - unsigned long flags = (unsigned long)key; - - if (flags POLLIN) { - /* An event has been signaled, call function */ - if ((!virqfd-handler || -virqfd-handler(virqfd-vdev, virqfd-data)) - virqfd-thread) - schedule_work(virqfd-inject); - } - - if (flags POLLHUP) { - unsigned long flags; - spin_lock_irqsave(virqfd-vdev-irqlock, flags); - - /* -* The eventfd is closing, if the virqfd has not yet been -* queued for release, as determined by testing whether the -* vdev pointer to it is still valid, queue it now. As -* with kvm irqfds, we know we won't race against the virqfd -* going away because we hold wqh-lock to get here. -*/ - if (*(virqfd-pvirqfd) == virqfd) { - *(virqfd-pvirqfd) = NULL; - virqfd_deactivate(virqfd); - } - - spin_unlock_irqrestore(virqfd-vdev-irqlock, flags); - } - - return 0; -} - -static void virqfd_ptable_queue_proc(struct file *file, -wait_queue_head_t *wqh, poll_table *pt) -{ - struct virqfd *virqfd = container_of(pt, struct virqfd, pt); - add_wait_queue(wqh, virqfd-wait); -} - -static void virqfd_shutdown(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - u64 cnt; - - eventfd_ctx_remove_wait_queue(virqfd-eventfd, virqfd-wait, cnt); - flush_work(virqfd-inject); - eventfd_ctx_put(virqfd-eventfd); - - kfree(virqfd); -} - -static void virqfd_inject(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, inject); - if (virqfd-thread) - virqfd-thread(virqfd-vdev, virqfd-data); -} - -static int virqfd_enable(struct vfio_pci_device *vdev, -int (*handler)(struct vfio_pci_device *, void *), -void (*thread)(struct vfio_pci_device *, void
[PATCH v9 17/19] vfio: virqfd: add vfio_ prefix to virqfd_enable and virqfd_disable
The virqfd_enable and virqfd_disable functions are now global. Add the vfio_ prefix to those functions. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/pci/vfio_pci_intrs.c | 18 +- drivers/vfio/virqfd.c | 14 +++--- include/linux/vfio.h | 10 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 6ca22a8..2aedff1 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -228,8 +228,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev-ctx[0].unmask); - virqfd_disable(vdev-ctx[0].mask); + vfio_virqfd_disable(vdev-ctx[0].unmask); + vfio_virqfd_disable(vdev-ctx[0].mask); vdev-irq_type = VFIO_PCI_NUM_IRQS; vdev-num_ctx = 0; kfree(vdev-ctx); @@ -379,8 +379,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev-num_ctx, NULL, msix); for (i = 0; i vdev-num_ctx; i++) { - virqfd_disable(vdev-ctx[i].unmask); - virqfd_disable(vdev-ctx[i].mask); + vfio_virqfd_disable(vdev-ctx[i].unmask); + vfio_virqfd_disable(vdev-ctx[i].mask); } if (msix) { @@ -413,12 +413,12 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, } else if (flags VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd = 0) - return virqfd_enable((void *) vdev, -vfio_pci_intx_unmask_handler, -vfio_send_intx_eventfd, NULL, -vdev-ctx[0].unmask, fd); + return vfio_virqfd_enable((void *) vdev, + vfio_pci_intx_unmask_handler, + vfio_send_intx_eventfd, NULL, + vdev-ctx[0].unmask, fd); - virqfd_disable(vdev-ctx[0].unmask); + vfio_virqfd_disable(vdev-ctx[0].unmask); } return 0; diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index ac63ec0..1794f07 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -101,10 +101,10 @@ static void virqfd_inject(struct work_struct *work) virqfd-thread(virqfd-opaque, virqfd-data); } -int virqfd_enable(void *opaque, - int (*handler)(void *, void *), - void (*thread)(void *, void *), - void *data, struct virqfd **pvirqfd, int fd) +int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), + void *data, struct virqfd **pvirqfd, int fd) { struct fd irqfd; struct eventfd_ctx *ctx; @@ -190,9 +190,9 @@ err_fd: return ret; } -EXPORT_SYMBOL_GPL(virqfd_enable); +EXPORT_SYMBOL_GPL(vfio_virqfd_enable); -void virqfd_disable(struct virqfd **pvirqfd) +void vfio_virqfd_disable(struct virqfd **pvirqfd) { unsigned long flags; @@ -212,4 +212,4 @@ void virqfd_disable(struct virqfd **pvirqfd) */ flush_workqueue(vfio_irqfd_cleanup_wq); } -EXPORT_SYMBOL_GPL(virqfd_disable); +EXPORT_SYMBOL_GPL(vfio_virqfd_disable); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ce23a42..378f320 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -142,10 +142,10 @@ struct virqfd { extern int vfio_pci_virqfd_init(void); extern void vfio_pci_virqfd_exit(void); -extern int virqfd_enable(void *opaque, -int (*handler)(void *, void *), -void (*thread)(void *, void *), -void *data, struct virqfd **pvirqfd, int fd); -extern void virqfd_disable(struct virqfd **pvirqfd); +extern int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), + void *data, struct virqfd **pvirqfd, int fd); +extern void vfio_virqfd_disable(struct virqfd **pvirqfd); #endif /* VFIO_H */ -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 13/19] vfio/platform: support for level sensitive interrupts
Level sensitive interrupts are exposed as maskable and automasked interrupts and are masked and disabled automatically when they fire. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform_irq.c | 102 +- drivers/vfio/platform/vfio_platform_private.h | 2 + 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index 2ac8ed7..563abf6 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -31,18 +31,108 @@ #include vfio_platform_private.h +static void vfio_platform_mask(struct vfio_platform_irq *irq_ctx) +{ + unsigned long flags; + + spin_lock_irqsave(irq_ctx-lock, flags); + + if (!irq_ctx-masked) { + disable_irq(irq_ctx-hwirq); + irq_ctx-masked = true; + } + + spin_unlock_irqrestore(irq_ctx-lock, flags); +} + static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - return -EINVAL; + if (start != 0 || count != 1) + return -EINVAL; + + if (!(vdev-irqs[index].flags VFIO_IRQ_INFO_MASKABLE)) + return -EINVAL; + + if (flags VFIO_IRQ_SET_DATA_EVENTFD) + return -EINVAL; /* not implemented yet */ + + if (flags VFIO_IRQ_SET_DATA_NONE) { + vfio_platform_mask(vdev-irqs[index]); + + } else if (flags VFIO_IRQ_SET_DATA_BOOL) { + uint8_t mask = *(uint8_t *)data; + + if (mask) + vfio_platform_mask(vdev-irqs[index]); + } + + return 0; +} + +static void vfio_platform_unmask(struct vfio_platform_irq *irq_ctx) +{ + unsigned long flags; + + spin_lock_irqsave(irq_ctx-lock, flags); + + if (irq_ctx-masked) { + enable_irq(irq_ctx-hwirq); + irq_ctx-masked = false; + } + + spin_unlock_irqrestore(irq_ctx-lock, flags); } static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - return -EINVAL; + if (start != 0 || count != 1) + return -EINVAL; + + if (!(vdev-irqs[index].flags VFIO_IRQ_INFO_MASKABLE)) + return -EINVAL; + + if (flags VFIO_IRQ_SET_DATA_EVENTFD) + return -EINVAL; /* not implemented yet */ + + if (flags VFIO_IRQ_SET_DATA_NONE) { + vfio_platform_unmask(vdev-irqs[index]); + + } else if (flags VFIO_IRQ_SET_DATA_BOOL) { + uint8_t unmask = *(uint8_t *)data; + + if (unmask) + vfio_platform_unmask(vdev-irqs[index]); + } + + return 0; +} + +static irqreturn_t vfio_maskable_irq_handler(int irq, void *dev_id) +{ + struct vfio_platform_irq *irq_ctx = dev_id; + unsigned long flags; + int ret = IRQ_NONE; + + spin_lock_irqsave(irq_ctx-lock, flags); + + if (!irq_ctx-masked) { + ret = IRQ_HANDLED; + + /* automask maskable interrupts */ + disable_irq_nosync(irq_ctx-hwirq); + irq_ctx-masked = true; + } + + spin_unlock_irqrestore(irq_ctx-lock, flags); + + if (ret == IRQ_HANDLED) + eventfd_signal(irq_ctx-trigger, 1); + + return ret; } static irqreturn_t vfio_irq_handler(int irq, void *dev_id) @@ -103,7 +193,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, irq_handler_t handler; if (vdev-irqs[index].flags VFIO_IRQ_INFO_MASKABLE) - return -EINVAL; /* not implemented */ + handler = vfio_maskable_irq_handler; else handler = vfio_irq_handler; @@ -175,13 +265,17 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) if (hwirq 0) goto err; + spin_lock_init(vdev-irqs[i].lock); + vdev-irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; if (irq_get_trigger_type(hwirq) IRQ_TYPE_LEVEL_MASK) - vdev-irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE; + vdev-irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE + | VFIO_IRQ_INFO_AUTOMASKED; vdev-irqs[i].count = 1; vdev-irqs[i].hwirq = hwirq; + vdev-irqs[i].masked = false; } vdev-num_irqs = cnt; diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index a3f2411..d25c4cd 100644 ---
[PATCH v9 02/19] vfio: platform: probe to devices on the platform bus
Driver to bind to Linux platform devices, and callbacks to discover their resources to be used by the main VFIO PLATFORM code. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/platform/vfio_platform.c | 114 ++ include/uapi/linux/vfio.h | 1 + 2 files changed, 115 insertions(+) create mode 100644 drivers/vfio/platform/vfio_platform.c diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c new file mode 100644 index 000..3885122 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis a.mota...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/device.h +#include linux/eventfd.h +#include linux/interrupt.h +#include linux/iommu.h +#include linux/module.h +#include linux/mutex.h +#include linux/notifier.h +#include linux/pm_runtime.h +#include linux/slab.h +#include linux/types.h +#include linux/uaccess.h +#include linux/vfio.h +#include linux/io.h +#include linux/platform_device.h +#include linux/irq.h + +#include vfio_platform_private.h + +#define DRIVER_VERSION 0.9 +#define DRIVER_AUTHOR Antonios Motakis a.mota...@virtualopensystems.com +#define DRIVER_DESC VFIO for platform devices - User Level meta-driver + +/* probing devices from the linux platform bus */ + +static struct resource *get_platform_resource(struct vfio_platform_device *vdev, + int num) +{ + struct platform_device *dev = (struct platform_device *) vdev-opaque; + int i; + + for (i = 0; i dev-num_resources; i++) { + struct resource *r = dev-resource[i]; + + if (resource_type(r) (IORESOURCE_MEM|IORESOURCE_IO)) { + if (!num) + return r; + + num--; + } + } + return NULL; +} + +static int get_platform_irq(struct vfio_platform_device *vdev, int i) +{ + struct platform_device *pdev = (struct platform_device *) vdev-opaque; + + return platform_get_irq(pdev, i); +} + +static int vfio_platform_probe(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev-opaque = (void *) pdev; + vdev-name = pdev-name; + vdev-flags = VFIO_DEVICE_FLAGS_PLATFORM; + vdev-get_resource = get_platform_resource; + vdev-get_irq = get_platform_irq; + + ret = vfio_platform_probe_common(vdev, pdev-dev); + if (ret) + kfree(vdev); + + return ret; +} + +static int vfio_platform_remove(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(pdev-dev); + if(vdev) { + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct platform_driver vfio_platform_driver = { + .probe = vfio_platform_probe, + .remove = vfio_platform_remove, + .driver = { + .name = vfio-platform, + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(vfio_platform_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE(GPL v2); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 06d66c9..9db1056 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -157,6 +157,7 @@ struct vfio_device_info { __u32 flags; #define VFIO_DEVICE_FLAGS_RESET(1 0)/* Device supports reset */ #define VFIO_DEVICE_FLAGS_PCI (1 1)/* vfio-pci device */ +#define VFIO_DEVICE_FLAGS_PLATFORM (1 2)/* vfio-platform device */ __u32 num_regions;/* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v9 15/19] vfio: add local lock in virqfd instead of depending on VFIO PCI
Virqfd just needs to keep accesses to any struct *virqfd safe, but this comes into play only when creating or destroying eventfds, so sharing the same spinlock with the VFIO bus driver is not necessary. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/pci/vfio_pci_intrs.c | 10 +- drivers/vfio/virqfd.c | 24 +--- include/linux/vfio.h | 3 +-- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 3f909bb..e56c814 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -226,8 +226,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev, vdev-ctx[0].unmask); - virqfd_disable(vdev, vdev-ctx[0].mask); + virqfd_disable(vdev-ctx[0].unmask); + virqfd_disable(vdev-ctx[0].mask); vdev-irq_type = VFIO_PCI_NUM_IRQS; vdev-num_ctx = 0; kfree(vdev-ctx); @@ -377,8 +377,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev-num_ctx, NULL, msix); for (i = 0; i vdev-num_ctx; i++) { - virqfd_disable(vdev, vdev-ctx[i].unmask); - virqfd_disable(vdev, vdev-ctx[i].mask); + virqfd_disable(vdev-ctx[i].unmask); + virqfd_disable(vdev-ctx[i].mask); } if (msix) { @@ -415,7 +415,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, vfio_send_intx_eventfd, NULL, vdev-ctx[0].unmask, fd); - virqfd_disable(vdev, vdev-ctx[0].unmask); + virqfd_disable(vdev-ctx[0].unmask); } return 0; diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 243eb61..27fa2f0 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -17,6 +17,7 @@ #include pci/vfio_pci_private.h static struct workqueue_struct *vfio_irqfd_cleanup_wq; +static spinlock_t lock; int __init vfio_pci_virqfd_init(void) { @@ -25,6 +26,8 @@ int __init vfio_pci_virqfd_init(void) if (!vfio_irqfd_cleanup_wq) return -ENOMEM; + spin_lock_init(lock); + return 0; } @@ -53,21 +56,21 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) if (flags POLLHUP) { unsigned long flags; - spin_lock_irqsave(virqfd-vdev-irqlock, flags); + spin_lock_irqsave(lock, flags); /* * The eventfd is closing, if the virqfd has not yet been * queued for release, as determined by testing whether the -* vdev pointer to it is still valid, queue it now. As +* virqfd pointer to it is still valid, queue it now. As * with kvm irqfds, we know we won't race against the virqfd -* going away because we hold wqh-lock to get here. +* going away because we hold the lock to get here. */ if (*(virqfd-pvirqfd) == virqfd) { *(virqfd-pvirqfd) = NULL; virqfd_deactivate(virqfd); } - spin_unlock_irqrestore(virqfd-vdev-irqlock, flags); + spin_unlock_irqrestore(lock, flags); } return 0; @@ -143,16 +146,16 @@ int virqfd_enable(struct vfio_pci_device *vdev, * we update the pointer to the virqfd under lock to avoid * pushing multiple jobs to release the same virqfd. */ - spin_lock_irq(vdev-irqlock); + spin_lock_irq(lock); if (*pvirqfd) { - spin_unlock_irq(vdev-irqlock); + spin_unlock_irq(lock); ret = -EBUSY; goto err_busy; } *pvirqfd = virqfd; - spin_unlock_irq(vdev-irqlock); + spin_unlock_irq(lock); /* * Install our own custom wake-up handling so we are notified via @@ -190,19 +193,18 @@ err_fd: } EXPORT_SYMBOL_GPL(virqfd_enable); -void virqfd_disable(struct vfio_pci_device *vdev, - struct virqfd **pvirqfd) +void virqfd_disable(struct virqfd **pvirqfd) { unsigned long flags; - spin_lock_irqsave(vdev-irqlock, flags); + spin_lock_irqsave(lock, flags); if (*pvirqfd) { virqfd_deactivate(*pvirqfd); *pvirqfd = NULL; } - spin_unlock_irqrestore(vdev-irqlock, flags); + spin_unlock_irqrestore(lock, flags); /* * Block until we know all outstanding shutdown jobs have completed. diff --git a/include/linux/vfio.h b/include/linux/vfio.h index
[PATCH v9 18/19] vfio: initialize the virqfd workqueue in VFIO generic code
Now we have finally completely decoupled virqfd from VFIO_PCI. We can initialize it from the VFIO generic code, in order to safely use it from multiple independent VFIO bus drivers. Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/pci/vfio_pci.c | 8 drivers/vfio/vfio.c | 8 drivers/vfio/virqfd.c | 4 ++-- include/linux/vfio.h| 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f782533..40e176d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1034,7 +1034,6 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(vfio_pci_driver); - vfio_pci_virqfd_exit(); vfio_pci_uninit_perm_bits(); } @@ -1047,11 +1046,6 @@ static int __init vfio_pci_init(void) if (ret) return ret; - /* Start the virqfd cleanup handler */ - ret = vfio_pci_virqfd_init(); - if (ret) - goto out_virqfd; - /* Register and scan for devices */ ret = pci_register_driver(vfio_pci_driver); if (ret) @@ -1060,8 +1054,6 @@ static int __init vfio_pci_init(void) return 0; out_driver: - vfio_pci_virqfd_exit(); -out_virqfd: vfio_pci_uninit_perm_bits(); return ret; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index f018d8d..8e84471 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1464,6 +1464,11 @@ static int __init vfio_init(void) if (ret) goto err_cdev_add; + /* Start the virqfd cleanup handler used by some VFIO bus drivers */ + ret = vfio_virqfd_init(); + if (ret) + goto err_virqfd; + pr_info(DRIVER_DESC version: DRIVER_VERSION \n); /* @@ -1476,6 +1481,8 @@ static int __init vfio_init(void) return 0; +err_virqfd: + cdev_del(vfio.group_cdev); err_cdev_add: unregister_chrdev_region(vfio.group_devt, MINORMASK); err_alloc_chrdev: @@ -1490,6 +1497,7 @@ static void __exit vfio_cleanup(void) { WARN_ON(!list_empty(vfio.group_list)); + vfio_virqfd_exit(); idr_destroy(vfio.group_idr); cdev_del(vfio.group_cdev); unregister_chrdev_region(vfio.group_devt, MINORMASK); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 1794f07..7e4c23b 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -18,7 +18,7 @@ static struct workqueue_struct *vfio_irqfd_cleanup_wq; static spinlock_t lock; -int __init vfio_pci_virqfd_init(void) +int __init vfio_virqfd_init(void) { vfio_irqfd_cleanup_wq = create_singlethread_workqueue(vfio-irqfd-cleanup); @@ -30,7 +30,7 @@ int __init vfio_pci_virqfd_init(void) return 0; } -void vfio_pci_virqfd_exit(void) +void vfio_virqfd_exit(void) { destroy_workqueue(vfio_irqfd_cleanup_wq); } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 378f320..2fb2e30 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -140,8 +140,8 @@ struct virqfd { struct virqfd **pvirqfd; }; -extern int vfio_pci_virqfd_init(void); -extern void vfio_pci_virqfd_exit(void); +extern int vfio_virqfd_init(void); +extern void vfio_virqfd_exit(void); extern int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), -- 2.1.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH linux-next] iommu: add iommu for s390 platform
On Mon, 27 Oct 2014 18:58:35 +0100 Joerg Roedel j...@8bytes.org wrote: On Mon, Oct 27, 2014 at 06:02:19PM +0100, Gerald Schaefer wrote: On Mon, 27 Oct 2014 17:25:02 +0100 Joerg Roedel j...@8bytes.org wrote: Is there some hardware reason for this or is that just an implementation detail that can be changed. In other words, does the hardware allow to use the same DMA table for multiple devices? Yes, the HW would allow shared DMA tables, but the implementation would need some non-trivial changes. For example, we have a per-device spin_lock for DMA table manipulations and the code in arch/s390/pci/pci_dma.c knows nothing about IOMMU domains or shared DMA tables, it just implements a set of dma_map_ops. I think it would make sense to move the DMA table handling code and the dma_map_ops implementation to the IOMMU driver too. This is also how some other IOMMU drivers implement it. Yes, I feared that this would come up, but I agree that it looks like the best solution, at least if we really want/need the IOMMU API for s390 now. I'll need to discuss this with Frank, he seems to be on vacation this week. Thanks for your feedback and explanations! The plan is to consolidate the dma_ops implementations someday and have a common implementation that works with all IOMMU drivers across architectures. This would benefit s390 as well and obsoletes the driver specific dma_ops implementation. Of course this would also go horribly wrong if a device was already in use (via the current dma_map_ops), but I guess using devices through the IOMMU_API prevents using them otherwise? This is taken care of by the device drivers. A driver for a device either uses the DMA-API or does its own management of DMA mappings using the IOMMU-API. VFIO is an example for the later case. I think it is much easier to use the same DMA table for all devices in a domain, if the hardware allows that. Yes, in this case, having one DMA table per domain and sharing it between all devices in that domain sounds like a good idea. However, I can't think of any use case for this, and Frank probably had a very special use case in mind where this scenario doesn't appear, hence the one device per domain restriction. One usecase is device access from user-space via VFIO. A userspace process might want to access multiple devices at the same time and VFIO would implement this by assigning all of these devices to the same IOMMU domain. This requirement also comes also from the IOMMU-API itself. The intention of the API is to make different IOMMUs look the same through the API, and this is violated when drivers implement a 1-1 domain-device mapping. So, if having multiple devices per domain is a must, then we probably need a thorough rewrite of the arch/s390/pci/pci_dma.c code. Yes, this is a requirement for new IOMMU drivers. We already have drivers implementing the same 1-1 relation and we are about to fix them. But I don't want to add new drivers doing the same. Joerg -- To unsubscribe from this list: send the line unsubscribe linux-s390 in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v9 14/19] vfio: move eventfd support code for VFIO_PCI to a separate file
Hi Antonios, On Mon, Oct 27, 2014 at 12:07 PM, Antonios Motakis a.mota...@virtualopensystems.com wrote: The virqfd functionality that is used by VFIO_PCI to implement interrupt masking and unmasking via an eventfd, is generic enough and can be reused by another driver. Move it to a separate file in order to allow the code to be shared. Also properly export virqfd_enable and virqfd_disable in the process. Alex will handle this, not me, but my personal preference is to avoid doing things in the process because the small changes get lost in the big patch. I'd rather see a strict move that changes no code at all (except things like necessary Makefile changes), followed by a smaller patch that does the additional stuff. Does properly export mean that those functions were previously *improperly* exported and the way they used to be exported caused a problem? Or does it just mean export? Bjorn Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com --- drivers/vfio/Makefile | 4 +- drivers/vfio/pci/vfio_pci_intrs.c | 213 --- drivers/vfio/pci/vfio_pci_private.h | 3 - drivers/vfio/virqfd.c | 214 include/linux/vfio.h| 28 + 5 files changed, 245 insertions(+), 217 deletions(-) create mode 100644 drivers/vfio/virqfd.c ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v9 17/19] vfio: virqfd: add vfio_ prefix to virqfd_enable and virqfd_disable
On Mon, Oct 27, 2014 at 12:08 PM, Antonios Motakis a.mota...@virtualopensystems.com wrote: The virqfd_enable and virqfd_disable functions are now global. Add the vfio_ prefix to those functions. Wouldn't it be better to change the name *before* making them global? Bjorn ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu