Re: [PATCH v4 0/2] iommu: Add iommu_error class event to iommu trace
On Tue, Sep 24, 2013 at 03:21:18PM -0600, Shuah Khan wrote: Shuah Khan (2): iommu: Add iommu_error class event to iommu trace iommu: Change iommu driver to call io_page_fault trace event Applied to the tracing branch, thanks Shuah. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/7] iommu/arm-smmu: Check for num_context_irqs 0 to avoid divide by zero exception
On Tue, Sep 24, 2013 at 11:40:48AM -0400, Will Deacon wrote: On Tue, Sep 24, 2013 at 04:06:58PM +0100, Andreas Herrmann wrote: With the right (or wrong;-) definition of v1 SMMU node in DTB it is possible to trigger a division by zero in arm_smmu_init_domain_context (if number of context irqs is 0): if (smmu-version == 1) { root_cfg-irptndx = atomic_inc_return(smmu-irptndx); → root_cfg-irptndx %= smmu-num_context_irqs; } else { Avoid this by checking for num_context_irqs 0 before trying to assign interrupts to contexts. Signed-off-by: Andreas Herrmann andreas.herrm...@calxeda.com --- drivers/iommu/arm-smmu.c | 31 +-- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f5a856e..0dfd255 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -828,21 +828,24 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, return ret; root_cfg-cbndx = ret; - if (smmu-version == 1) { - root_cfg-irptndx = atomic_inc_return(smmu-irptndx); - root_cfg-irptndx %= smmu-num_context_irqs; - } else { - root_cfg-irptndx = root_cfg-cbndx; - } - irq = smmu-irqs[smmu-num_global_irqs + root_cfg-irptndx]; - ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED, - arm-smmu-context-fault, domain); - if (IS_ERR_VALUE(ret)) { - dev_err(smmu-dev, failed to request context IRQ %d (%u)\n, - root_cfg-irptndx, irq); - root_cfg-irptndx = -1; - goto out_free_context; + if (smmu-num_context_irqs) { Can we move this check to probe time, to avoid re-evaluating it every time we initialise a new domain? Yes, I'll move this check and issue an error message when there is not at least one context interrupt available. Andreas ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 3/3] Fix compile error in drivers/gpu/drm/msm/msm_drv.c with IOMMU disabled
The function msm_iommu_get_ctx() is needed buy the MSM-GPU driver with and wiithout IOMMU compiled in. Make the function available when no IOMMU driver is there. Signed-off-by: Joerg Roedel j...@8bytes.org --- drivers/iommu/msm_iommu.h |7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/msm_iommu.h b/drivers/iommu/msm_iommu.h index 5c7c955..da53558 100644 --- a/drivers/iommu/msm_iommu.h +++ b/drivers/iommu/msm_iommu.h @@ -108,7 +108,14 @@ struct msm_iommu_ctx_drvdata { * Useful for testing and drivers that do not yet fully have IOMMU stuff in * their platform devices. */ +#ifdef CONFIG_MSM_IOMMU struct device *msm_iommu_get_ctx(const char *ctx_name); +#else +static inline struct device *msm_iommu_get_ctx(const char *ctx_name) +{ + return NULL; +} +#endif /* * Interrupt handler for the IOMMU context fault interrupt. Hooking the -- 1.7.9.5 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/3] MSM: Remove iommu include from drivers/gpu/drm/msm/mdp4/mdp4_kms.c
The include file has been removed and the file does not need it anyway, so remove it. Fixes a compile error. Signed-off-by: Joerg Roedel j...@8bytes.org --- drivers/gpu/drm/msm/mdp4/mdp4_kms.c |2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c index 5db5bba..bc7fd11 100644 --- a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c @@ -19,8 +19,6 @@ #include msm_drv.h #include mdp4_kms.h -#include mach/iommu.h - static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev); static int mdp4_hw_init(struct msm_kms *kms) -- 1.7.9.5 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 6/7] iommu/arm-smmu: Add module parameter arm-smmu=off|force_isolation
On Tue, Sep 24, 2013 at 11:42:18AM -0400, Will Deacon wrote: On Tue, Sep 24, 2013 at 04:07:00PM +0100, Andreas Herrmann wrote: arm-smmu= arm-smmu driver option off Disable arm-smmu driver (ie. ignore available SMMUs) force_isolation Try to attach each master device on every SMMU to a separate iommu_domain. Default is that driver detects SMMUs but no translation is configured (transactions just bypass translation process). Signed-off-by: Andreas Herrmann andreas.herrm...@calxeda.com --- drivers/iommu/arm-smmu.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 3eb2259..251564e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -399,6 +399,9 @@ struct arm_smmu_domain { static DEFINE_SPINLOCK(arm_smmu_devices_lock); static LIST_HEAD(arm_smmu_devices); +static bool arm_smmu_disabled; +static bool arm_smmu_force_isolation; + static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, struct device_node *dev_node) { @@ -1837,6 +1840,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) struct of_phandle_args masterspec; int num_irqs, i, err; + if (arm_smmu_disabled) + return -ENODEV; + smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); if (!smmu) { dev_err(dev, failed to allocate arm_smmu_device\n); @@ -2022,6 +2028,23 @@ static struct platform_driver arm_smmu_driver = { .remove = arm_smmu_device_remove, }; +static int __init arm_smmu_parse_options(char *str) +{ + if (*str) { + str++; + if (!strncmp(str, off, 3)) + arm_smmu_disabled = true; + else if(!strncmp(str, force_isolation, 15)) + arm_smmu_force_isolation = true; + else { + pr_warn(arm_smmu: invalid parameter (\%s\)\n, str); + return 0; + } + } + return 1; +} +__setup(arm-smmu, arm_smmu_parse_options); If this is going to be a common function for IOMMUs, let's instead move the command-line parsing out into the generic IOMMU layer, then have an optional callback into the low-level IOMMU driver for enabling/disabling it. Makes sense and I am currently looking into it. Andreas ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Sat, Sep 21, 2013 at 09:59:00PM -0500, David Woodhouse wrote: I *hate* the bizarre calling convention for iommu_unmap(). Is it actually clearly documented anywhere? Why on earth is it not just returning void, and expected to unmap what it was *asked* to unmap? Yeah, I agree that this should be documented since it is quite non-standard/non-obvious behaviour of a function. The reason the interface was implemented this way is that the caller should not need to know (or keep track of) the page-size which was used to map a given iova. So the interface is basically, that you give an iova and a size and the iommu driver unmaps _at_least_ a region of that size. But if you ask for a 4k region which is mapped by a 2M page then the whole 2M are unmapped. The return value tells you how much was actually unmapped. Not the best interface, I know. We should come up with a better way to handle this. Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 6/7] iommu/arm-smmu: Add module parameter arm-smmu=off|force_isolation
On Tue, Sep 24, 2013 at 04:42:18PM +0100, Will Deacon wrote: On Tue, Sep 24, 2013 at 04:07:00PM +0100, Andreas Herrmann wrote: +__setup(arm-smmu, arm_smmu_parse_options); If this is going to be a common function for IOMMUs, let's instead move the command-line parsing out into the generic IOMMU layer, then have an optional callback into the low-level IOMMU driver for enabling/disabling it. Hmm, actually the force-isolation parameters that different IOMMU drivers implement are for their version of the DMA-API which is not yet available in the generic IOMMU layer. Unless this changes I think it is a good idea to keep these parameters close to the actual DMA-API implementation used and not move them to the IOMMU layer. Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 17:54 +0200, Joerg Roedel wrote: On Sat, Sep 21, 2013 at 09:59:00PM -0500, David Woodhouse wrote: I *hate* the bizarre calling convention for iommu_unmap(). Is it actually clearly documented anywhere? Why on earth is it not just returning void, and expected to unmap what it was *asked* to unmap? Yeah, I agree that this should be documented since it is quite non-standard/non-obvious behaviour of a function. The reason the interface was implemented this way is that the caller should not need to know (or keep track of) the page-size which was used to map a given iova. So the interface is basically, that you give an iova and a size and the iommu driver unmaps _at_least_ a region of that size. But if you ask for a 4k region which is mapped by a 2M page then the whole 2M are unmapped. The return value tells you how much was actually unmapped. Not the best interface, I know. We should come up with a better way to handle this. Actually, unmap can return zero too, which happens if you try to unmap an offset into a super page mapping. The current interface has issues, but both legacy kvm assignment and vfio rely on it so we don't need to track individual mappings. Thanks, Alex ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 5/7] iommu: supress loff_t compilation error on powerpc
On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- drivers/vfio/pci/vfio_pci_rdwr.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 210db24..8a8156a 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -181,7 +181,8 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { int ret; - loff_t off, pos = *ppos VFIO_PCI_OFFSET_MASK; + loff_t off; + u64 pos = (u64 )(*ppos VFIO_PCI_OFFSET_MASK); void __iomem *iomem = NULL; unsigned int rsrc; bool is_ioport; What's the compile error that this fixes? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: This api return the iommu domain to which the device is attached. The iommu_domain is required for making API calls related to iommu. Follow up patches which use this API to know iommu maping. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- drivers/iommu/iommu.c | 10 ++ include/linux/iommu.h |7 +++ 2 files changed, 17 insertions(+), 0 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index fbe9ca7..6ac5f50 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) } EXPORT_SYMBOL_GPL(iommu_detach_device); +struct iommu_domain *iommu_get_dev_domain(struct device *dev) +{ + struct iommu_ops *ops = dev-bus-iommu_ops; + + if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL)) + return NULL; + + return ops-get_dev_iommu_domain(dev); +} +EXPORT_SYMBOL_GPL(iommu_get_dev_domain); What prevents this from racing iommu_domain_free()? There's no references acquired, so there's no reason for the caller to assume the pointer is valid. /* * IOMMU groups are really the natrual working unit of the IOMMU, but * the IOMMU API works on domains and devices. Bridge that gap by diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7ea319e..fa046bd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -127,6 +127,7 @@ struct iommu_ops { int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count); /* Get the numer of window per domain */ u32 (*domain_get_windows)(struct iommu_domain *domain); + struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev); unsigned long pgsize_bitmap; }; @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev); /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened @@ -284,6 +286,11 @@ static inline void iommu_domain_window_disable(struct iommu_domain *domain, { } +static inline struct iommu_domain *iommu_get_dev_domain(struct device *dev) +{ + return NULL; +} + static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 7/7] iommu/arm-smmu: Clear global and context bank fault status and syndrome registers
On Tue, Sep 24, 2013 at 07:32:47PM +0100, Andreas Herrmann wrote: On Tue, Sep 24, 2013 at 11:42:52AM -0400, Will Deacon wrote: On Tue, Sep 24, 2013 at 04:07:01PM +0100, Andreas Herrmann wrote: Signed-off-by: Andreas Herrmann andreas.herrm...@calxeda.com --- drivers/iommu/arm-smmu.c |9 + 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 251564e..a499146 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -645,6 +645,10 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) stage1 = root_cfg-cbar != CBAR_TYPE_S2_TRANS; cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg-cbndx); + /* clear fsr */ + writel_relaxed(0x, cb_base + ARM_SMMU_CB_FSR); + writel_relaxed(0, cb_base + ARM_SMMU_CB_FSYNR0); + /* CBAR */ reg = root_cfg-cbar; if (smmu-version == 1) @@ -1570,6 +1574,11 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) int i = 0; u32 scr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sCR0); + /* clear global FSRs */ + writel(0x, gr0_base + ARM_SMMU_GR0_sGFSR); + writel(0, gr0_base + ARM_SMMU_GR0_sGFSYNR0); + writel(0, gr0_base + ARM_SMMU_GR0_sGFSYNR1); Why do you need this? According to the spec the status and syndrome registers have unknown/unpredictable reset values. So better set known values before we start to use these registers (ie. handle faults where we read them). No? Sure, but the only time these are made visible should be if we take a fault, in which case the registers should contain something meaningful. I guess I could see a problem if there is a shared interrupt line for faults though, so a cut-down version of your patch (just clearing ARM_SMMU_GR0_sGFSR and the ARM_SMMU_CB_FSR registers) should be enough. Cheers, Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 17:54 +0200, Joerg Roedel wrote: On Sat, Sep 21, 2013 at 09:59:00PM -0500, David Woodhouse wrote: I *hate* the bizarre calling convention for iommu_unmap(). Is it actually clearly documented anywhere? Why on earth is it not just returning void, and expected to unmap what it was *asked* to unmap? Yeah, I agree that this should be documented since it is quite non-standard/non-obvious behaviour of a function. The reason the interface was implemented this way is that the caller should not need to know (or keep track of) the page-size which was used to map a given iova. Why would it ever care? If it *happens* to map something that can use large pages, yay!. If it subsequently breaks apart those large pages by unmapping 4KiB in the middle, let the IOMMU driver break that apart. And why are we not allowed to unmap more than a single page at a time? Doing unmaps in small pieces (with a full flush between every one) is *horribly* slow. So the interface is basically, that you give an iova and a size and the iommu driver unmaps _at_least_ a region of that size. But if you ask for a 4k region which is mapped by a 2M page then the whole 2M are unmapped. The return value tells you how much was actually unmapped. Not the best interface, I know. We should come up with a better way to handle this. Seriously, just the address and the size. Let the IOMMU code deal with whether it can *actually* use large pages on the particular set of IOMMUs that are in use for the devices you've added to the domain so far. -- dwmw2 smime.p7s Description: S/MIME cryptographic signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 2/7] iommu/arm-smmu: Calculate SMMU_CB_BASE from smmu register values
On Tue, Sep 24, 2013 at 07:07:20PM +0100, Andreas Herrmann wrote: On Tue, Sep 24, 2013 at 11:34:57AM -0400, Will Deacon wrote: On Tue, Sep 24, 2013 at 04:06:56PM +0100, Andreas Herrmann wrote: Currently it is derived from smmu resource size. If the resource size is wrongly specified (e.g. too large) this leads to a miscalculation and can cause undefined behaviour when context bank registers are modified. [...] Hmm, this is a tricky one. We know that we have an inconsistency (i.e. the DT and the hardware don't agree on the size of the device) but we warn and attempt to continue with the value from the DT. I don't think that trusting the hardware is the right thing to do in this case, since it's not possible to change so we should let the DT act as an override. In other words: if the device tree is wrong, go fix it. Yes, I've found this issue with a wrong DT. With the original code there was some weirdness when setting certain context bank registers. (Identifying the root cause was not straight forward.) I think it's somehow odd not to trust the hardware values in the first place and to add (right from the beginning) a quirk for potential implementation bugs. Are there already implementations that use wrong register values that are required to determine the partitioning of the SMMU address space? I don't know of any, but you can bet that people will want to run old kernels on future hardware, so we should try and get this right from day one. If there is a mismatch it's hard to say which value is the correct one. I think there are three options: (1) just print a warning about the mismatch (2) print a warning + override based on DT (3) print a warning + override based on DT + have an option to switch off the override So, what's your choice? I had gone for (2), on the assumption that fixing a broken DT shouldn't be too hard as well as allowing people to work around broken hardware. Yes, it means we treat the DT as golden, but that's already the case in the absence of fully probable hardware. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, Sep 25, 2013 at 05:05:13PM +0100, David Woodhouse wrote: On Wed, 2013-09-25 at 17:54 +0200, Joerg Roedel wrote: Yeah, I agree that this should be documented since it is quite non-standard/non-obvious behaviour of a function. The reason the interface was implemented this way is that the caller should not need to know (or keep track of) the page-size which was used to map a given iova. Why would it ever care? If it *happens* to map something that can use large pages, yay!. If it subsequently breaks apart those large pages by unmapping 4KiB in the middle, let the IOMMU driver break that apart. All that was implemented back in the old days where KVM was the only user of that code. And back then it didn't make sense for the IOMMU driver to split huge-pages because in the next step KVM would unmap all the small pages anyway. So I took the other route and just told KVM how much was unmapped so that the unmap code could skip over it. And why are we not allowed to unmap more than a single page at a time? The user of iommu_unmap is actually allowed to do that. But the page-size splitting it done in the iommu_unmap() function already to have that code common between different IOMMU drivers. In the end the IOMMU driver only exports the page-size bitmap and is fine. Doing unmaps in small pieces (with a full flush between every one) is *horribly* slow. I proposed an iommu_commit() function to move all the necessary flushes there after a couple of page-table changes. And this also allows to keep the generic page-size splitting code generic. Seriously, just the address and the size. Let the IOMMU code deal with whether it can *actually* use large pages on the particular set of IOMMUs that are in use for the devices you've added to the domain so far. Actually we can get rid of the unmapped_size thing by putting the same constraints the DMA-API has in place. The user of the API has to unmap a region with the same iova,size parameters used for mapping it. Then we can guarantee that only the requested range will be unmapped. Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 17:05 +0100, David Woodhouse wrote: On Wed, 2013-09-25 at 17:54 +0200, Joerg Roedel wrote: On Sat, Sep 21, 2013 at 09:59:00PM -0500, David Woodhouse wrote: I *hate* the bizarre calling convention for iommu_unmap(). Is it actually clearly documented anywhere? Why on earth is it not just returning void, and expected to unmap what it was *asked* to unmap? Yeah, I agree that this should be documented since it is quite non-standard/non-obvious behaviour of a function. The reason the interface was implemented this way is that the caller should not need to know (or keep track of) the page-size which was used to map a given iova. Why would it ever care? If it *happens* to map something that can use large pages, yay!. If it subsequently breaks apart those large pages by unmapping 4KiB in the middle, let the IOMMU driver break that apart. Can this be done atomically? I thought part of the reason for this interface was that iommu drivers typically couldn't replace a huge page with multiple smaller pages in the presence of DMA. And why are we not allowed to unmap more than a single page at a time? Doing unmaps in small pieces (with a full flush between every one) is *horribly* slow. While we call iommu_unmap with PAGE_SIZE, we certainly expect that larger chunks will be used if they were mapped that way. So the interface is basically, that you give an iova and a size and the iommu driver unmaps _at_least_ a region of that size. But if you ask for a 4k region which is mapped by a 2M page then the whole 2M are unmapped. The return value tells you how much was actually unmapped. Not the best interface, I know. We should come up with a better way to handle this. Seriously, just the address and the size. Let the IOMMU code deal with whether it can *actually* use large pages on the particular set of IOMMUs that are in use for the devices you've added to the domain so far. You're contradicting yourself here. Just let the iommu deal with it, but now you're raising concerns that Intel may be mixing super page IOMMU hardware with non-super page IOMMU hardware on the same box, so I do need to be concerned, yet there's no interface to discover super page support. What happens if my IOMMU domain makes use of super pages and then I add a new device behind a new IOMMU without hardware super page support? We have the same problem with snoop control already. There's no atomic re-mapping interface, so KVM just pretends it can unmap and remap without anyone noticing. Thanks, Alex ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 11:36 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 17:05 +0100, David Woodhouse wrote: Why would it ever care? If it *happens* to map something that can use large pages, yay!. If it subsequently breaks apart those large pages by unmapping 4KiB in the middle, let the IOMMU driver break that apart. Can this be done atomically? I thought part of the reason for this interface was that iommu drivers typically couldn't replace a huge page with multiple smaller pages in the presence of DMA. For the Intel IOMMU it can. You can atomically change from a large page entry, to a pointer to a full set of smaller page tables. Do the IOTLB flush, and at no time is there an interruption in service. Not sure if this is true for *all* IOMMU hardware; I'd be perfectly happy to accept a variant of Jörg's proposal that we should only ever unmap exactly the same range that we mapped. Except we should allow the unmapping of adjacent regions together; just not a partial unmap of something that was mapped in one go. Seriously, just the address and the size. Let the IOMMU code deal with whether it can *actually* use large pages on the particular set of IOMMUs that are in use for the devices you've added to the domain so far. You're contradicting yourself here. Just let the iommu deal with it, but now you're raising concerns that Intel may be mixing super page IOMMU hardware with non-super page IOMMU hardware on the same box, so I do need to be concerned, yet there's no interface to discover super page support. No contradiction. Do Not Concern Yourself. Just tell the IOMMU what you want mapped, and where. Let *it* worry about whether it can use superpages or not. Like it already *does*. What happens if my IOMMU domain makes use of super pages and then I add a new device behind a new IOMMU without hardware super page support? Currently, you end up with the domain happily including superpages, and the less capable IOMMU that you added later won't cope. What we probably *ought* to do is walk the page tables and convert any pre-existing superpages to small pages, at the time we add the non-SP-capable IOMMU. FWIW we currently screw up the handling of cache-coherent vs. non-coherent page tables too. That one wants a wbinvd somewhere when we add a non-coherent IOMMU to the domain. -- dwmw2 smime.p7s Description: S/MIME cryptographic signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 7/7] vfio pci: Add vfio iommu implementation for FSL_PAMU
On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: This patch adds vfio iommu support for Freescale IOMMU (PAMU - Peripheral Access Management Unit). The Freescale PAMU is an aperture-based IOMMU with the following characteristics. Each device has an entry in a table in memory describing the iova-phys mapping. The mapping has: -an overall aperture that is power of 2 sized, and has a start iova that is naturally aligned -has 1 or more windows within the aperture -number of windows must be power of 2, max is 256 -size of each window is determined by aperture size / # of windows -iova of each window is determined by aperture start iova / # of windows -the mapped region in each window can be different than the window size...mapping must power of 2 -physical address of the mapping must be naturally aligned with the mapping size Some of the code is derived from TYPE1 iommu (driver/vfio/vfio_iommu_type1.c). Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- drivers/vfio/Kconfig |6 + drivers/vfio/Makefile |1 + drivers/vfio/vfio_iommu_fsl_pamu.c | 952 include/uapi/linux/vfio.h | 100 4 files changed, 1059 insertions(+), 0 deletions(-) create mode 100644 drivers/vfio/vfio_iommu_fsl_pamu.c diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 26b3d9d..7d1da26 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE depends on VFIO SPAPR_TCE_IOMMU default n +config VFIO_IOMMU_FSL_PAMU + tristate + depends on VFIO + default n + menuconfig VFIO tristate VFIO Non-Privileged userspace driver framework depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) + select VFIO_IOMMU_FSL_PAMU if FSL_PAMU help VFIO provides a framework for secure userspace device drivers. See Documentation/vfio.txt for more details. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index c5792ec..7461350 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o vfio_iommu_spapr_tce.o +obj-$(CONFIG_VFIO_IOMMU_FSL_PAMU) += vfio_iommu_common.o vfio_iommu_fsl_pamu.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio_iommu_fsl_pamu.c b/drivers/vfio/vfio_iommu_fsl_pamu.c new file mode 100644 index 000..b29365f --- /dev/null +++ b/drivers/vfio/vfio_iommu_fsl_pamu.c @@ -0,0 +1,952 @@ +/* + * VFIO: IOMMU DMA mapping support for FSL PAMU IOMMU + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + * Author: Bharat Bhushan bharat.bhus...@freescale.com + * + * This file is derived from driver/vfio/vfio_iommu_type1.c + * + * The Freescale PAMU is an aperture-based IOMMU with the following + * characteristics. Each device has an entry in a table in memory + * describing the iova-phys mapping. The mapping has: + * -an overall aperture that is power of 2 sized, and has a start iova that + * is naturally aligned + * -has 1 or more windows within the aperture + * -number of windows must be power of 2, max is 256 + * -size of each window is determined by aperture size / # of windows + * -iova of each window is determined by aperture start iova / # of windows + * -the mapped region in each window can be different than + * the window size...mapping must power of 2 + * -physical address of the mapping must be naturally aligned + * with the mapping size + */ + +#include linux/compat.h +#include linux/device.h +#include linux/fs.h +#include linux/iommu.h +#include linux/module.h +#include linux/mm.h +#include linux/pci.h /* pci_bus_type */ +#include linux/sched.h +#include linux/slab.h +#include linux/uaccess.h +#include linux/vfio.h +#include linux/workqueue.h +#include linux/hugetlb.h +#include linux/msi.h +#include asm/fsl_pamu_stash.h + +#include vfio_iommu_common.h + +#define DRIVER_VERSION
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 19:52 +0100, David Woodhouse wrote: On Wed, 2013-09-25 at 11:36 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 17:05 +0100, David Woodhouse wrote: Why would it ever care? If it *happens* to map something that can use large pages, yay!. If it subsequently breaks apart those large pages by unmapping 4KiB in the middle, let the IOMMU driver break that apart. Can this be done atomically? I thought part of the reason for this interface was that iommu drivers typically couldn't replace a huge page with multiple smaller pages in the presence of DMA. For the Intel IOMMU it can. You can atomically change from a large page entry, to a pointer to a full set of smaller page tables. Do the IOTLB flush, and at no time is there an interruption in service. Cool Not sure if this is true for *all* IOMMU hardware; I'd be perfectly happy to accept a variant of Jörg's proposal that we should only ever unmap exactly the same range that we mapped. Except we should allow the unmapping of adjacent regions together; just not a partial unmap of something that was mapped in one go. Well, except if we've just trusted the IOMMU driver to add a device behind a non-SP capable IOMMU to our domain and convert the page tables, that partial unmap is no longer partial and now we get different behavior than before so we can't depend on that adjacent unmapping. Seriously, just the address and the size. Let the IOMMU code deal with whether it can *actually* use large pages on the particular set of IOMMUs that are in use for the devices you've added to the domain so far. You're contradicting yourself here. Just let the iommu deal with it, but now you're raising concerns that Intel may be mixing super page IOMMU hardware with non-super page IOMMU hardware on the same box, so I do need to be concerned, yet there's no interface to discover super page support. No contradiction. Do Not Concern Yourself. Just tell the IOMMU what you want mapped, and where. Let *it* worry about whether it can use superpages or not. Like it already *does*. I'd love to be able to do that, but... What happens if my IOMMU domain makes use of super pages and then I add a new device behind a new IOMMU without hardware super page support? Currently, you end up with the domain happily including superpages, and the less capable IOMMU that you added later won't cope. This is the trouble with trusting the iommu driver. ;) What we probably *ought* to do is walk the page tables and convert any pre-existing superpages to small pages, at the time we add the non-SP-capable IOMMU. And then we need to figure out how to handle that in the proposed interface changes above since it changes the unmap behavior to the naive user. There's also the question of whether the IOMMU driver should re-evaluate super pages when the less capable IOMMU is removed from the domain. FWIW we currently screw up the handling of cache-coherent vs. non-coherent page tables too. That one wants a wbinvd somewhere when we add a non-coherent IOMMU to the domain. You're not selling the trust the IOMMU driver story very well here. Can we assume that the IOMMU_CACHE flag (SNP) is ignored appropriately by non-coherent IOMMUs? Is there any downside to ignoring it and always setting SNP in the IOMMU page tables? AMD IOMMU ignores it, but it's also always cache coherent. Thanks, Alex ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 13:44 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 19:52 +0100, David Woodhouse wrote: On Wed, 2013-09-25 at 11:36 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 17:05 +0100, David Woodhouse wrote: Why would it ever care? If it *happens* to map something that can use large pages, yay!. If it subsequently breaks apart those large pages by unmapping 4KiB in the middle, let the IOMMU driver break that apart. Can this be done atomically? I thought part of the reason for this interface was that iommu drivers typically couldn't replace a huge page with multiple smaller pages in the presence of DMA. For the Intel IOMMU it can. You can atomically change from a large page entry, to a pointer to a full set of smaller page tables. Do the IOTLB flush, and at no time is there an interruption in service. Cool Not sure if this is true for *all* IOMMU hardware; I'd be perfectly happy to accept a variant of Jörg's proposal that we should only ever unmap exactly the same range that we mapped. Except we should allow the unmapping of adjacent regions together; just not a partial unmap of something that was mapped in one go. Well, except if we've just trusted the IOMMU driver to add a device behind a non-SP capable IOMMU to our domain and convert the page tables, that partial unmap is no longer partial and now we get different behavior than before so we can't depend on that adjacent unmapping. Que? Jörg's proposal was that if you add a mapping at a given address+size, you should always remove *exactly* that address+size. Which will always work exactly the same, regardless of superpages. My slight change to that was that if you also added an *adjacent* mapping at address2+size2, you should be able to unmap both at the same time. Which will *also* always work the same regardless of superpages. Even if your two mappings were also *physically* contiguous, and *could* have used superpages, they probably won't anyway because you mapped them in two parts. What happens if my IOMMU domain makes use of super pages and then I add a new device behind a new IOMMU without hardware super page support? Currently, you end up with the domain happily including superpages, and the less capable IOMMU that you added later won't cope. This is the trouble with trusting the iommu driver. ;) Sorry, I should have made it clearer that this is a *bug*. It's not by design. The IOMMU driver ought to get this right, and will do. What we probably *ought* to do is walk the page tables and convert any pre-existing superpages to small pages, at the time we add the non-SP-capable IOMMU. And then we need to figure out how to handle that in the proposed interface changes above since it changes the unmap behavior to the naive user. Isn't that what you'd *expect*? Surely you don't *expect* the breakage you currently get? There's also the question of whether the IOMMU driver should re-evaluate super pages when the less capable IOMMU is removed from the domain. I wouldn't bother to go looking for opportunities to use super pages if we remove the last non-SP-capable IOMMU from the domain. FWIW we currently screw up the handling of cache-coherent vs. non-coherent page tables too. That one wants a wbinvd somewhere when we add a non-coherent IOMMU to the domain. You're not selling the trust the IOMMU driver story very well here. Can we assume that the IOMMU_CACHE flag (SNP) is ignored appropriately by non-coherent IOMMUs? Is there any downside to ignoring it and always setting SNP in the IOMMU page tables? AMD IOMMU ignores it, but it's also always cache coherent. Thanks, SNP is a separate issue. I'm speaking of cache coherency of the hardware page table walk — the feature bit that all the horrid clflush calls are predicated on. Again, this is just a bug. We *should* be getting this right, but don't yet. -- dwmw2 smime.p7s Description: S/MIME cryptographic signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 21:11 +0100, David Woodhouse wrote: On Wed, 2013-09-25 at 13:44 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 19:52 +0100, David Woodhouse wrote: On Wed, 2013-09-25 at 11:36 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 17:05 +0100, David Woodhouse wrote: Why would it ever care? If it *happens* to map something that can use large pages, yay!. If it subsequently breaks apart those large pages by unmapping 4KiB in the middle, let the IOMMU driver break that apart. Can this be done atomically? I thought part of the reason for this interface was that iommu drivers typically couldn't replace a huge page with multiple smaller pages in the presence of DMA. For the Intel IOMMU it can. You can atomically change from a large page entry, to a pointer to a full set of smaller page tables. Do the IOTLB flush, and at no time is there an interruption in service. Cool Not sure if this is true for *all* IOMMU hardware; I'd be perfectly happy to accept a variant of Jörg's proposal that we should only ever unmap exactly the same range that we mapped. Except we should allow the unmapping of adjacent regions together; just not a partial unmap of something that was mapped in one go. Well, except if we've just trusted the IOMMU driver to add a device behind a non-SP capable IOMMU to our domain and convert the page tables, that partial unmap is no longer partial and now we get different behavior than before so we can't depend on that adjacent unmapping. Que? Jörg's proposal was that if you add a mapping at a given address+size, you should always remove *exactly* that address+size. Which will always work exactly the same, regardless of superpages. My slight change to that was that if you also added an *adjacent* mapping at address2+size2, you should be able to unmap both at the same time. Which will *also* always work the same regardless of superpages. Even if your two mappings were also *physically* contiguous, and *could* have used superpages, they probably won't anyway because you mapped them in two parts. Ok, sounds reasonable. I somehow read it to still include some aspect of the fill in the size API we have now. What happens if my IOMMU domain makes use of super pages and then I add a new device behind a new IOMMU without hardware super page support? Currently, you end up with the domain happily including superpages, and the less capable IOMMU that you added later won't cope. This is the trouble with trusting the iommu driver. ;) Sorry, I should have made it clearer that this is a *bug*. It's not by design. The IOMMU driver ought to get this right, and will do. What we probably *ought* to do is walk the page tables and convert any pre-existing superpages to small pages, at the time we add the non-SP-capable IOMMU. And then we need to figure out how to handle that in the proposed interface changes above since it changes the unmap behavior to the naive user. Isn't that what you'd *expect*? Surely you don't *expect* the breakage you currently get? For vfio I currently make the same statement that you're pushing for the IOMMU API; I only guarantee unmapping at the same granularity as the original mapping. So that seems fine to me. There's also the question of whether the IOMMU driver should re-evaluate super pages when the less capable IOMMU is removed from the domain. I wouldn't bother to go looking for opportunities to use super pages if we remove the last non-SP-capable IOMMU from the domain. I predict bugs getting filed if a guest sees a performance hit after adding a device that is not restored when the device is removed. If only we could assume a similar feature set among IOMMUs in a system. FWIW we currently screw up the handling of cache-coherent vs. non-coherent page tables too. That one wants a wbinvd somewhere when we add a non-coherent IOMMU to the domain. You're not selling the trust the IOMMU driver story very well here. Can we assume that the IOMMU_CACHE flag (SNP) is ignored appropriately by non-coherent IOMMUs? Is there any downside to ignoring it and always setting SNP in the IOMMU page tables? AMD IOMMU ignores it, but it's also always cache coherent. Thanks, SNP is a separate issue. I'm speaking of cache coherency of the hardware page table walk — the feature bit that all the horrid clflush calls are predicated on. Again, this is just a bug. We *should* be getting this right, but don't yet. And for DMA_PTE_SNP? intel_iommu_map() won't let us set this bit if the domain contains a hardware unit that doesn't support ecap.SC, but it also doesn't update existing mappings. Barring hardware bugs, it seems much easier to unconditionally set DMA_PTE_SNP but still advertise IOMMU_CAP_CACHE_COHERENCY based on the composition of the domain. Otherwise we have to
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 15:46 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 21:11 +0100, David Woodhouse wrote: I wouldn't bother to go looking for opportunities to use super pages if we remove the last non-SP-capable IOMMU from the domain. I predict bugs getting filed if a guest sees a performance hit after adding a device that is not restored when the device is removed. If only we could assume a similar feature set among IOMMUs in a system. Ok, fine. As long as you don't have *too* much mapped, that shouldn't suck too much. FWIW we currently screw up the handling of cache-coherent vs. non-coherent page tables too. That one wants a wbinvd somewhere when we add a non-coherent IOMMU to the domain. You're not selling the trust the IOMMU driver story very well here. Can we assume that the IOMMU_CACHE flag (SNP) is ignored appropriately by non-coherent IOMMUs? Is there any downside to ignoring it and always setting SNP in the IOMMU page tables? AMD IOMMU ignores it, but it's also always cache coherent. Thanks, SNP is a separate issue. I'm speaking of cache coherency of the hardware page table walk — the feature bit that all the horrid clflush calls are predicated on. Again, this is just a bug. We *should* be getting this right, but don't yet. And for DMA_PTE_SNP? intel_iommu_map() won't let us set this bit if the domain contains a hardware unit that doesn't support ecap.SC, but it also doesn't update existing mappings. Barring hardware bugs, it seems much easier to unconditionally set DMA_PTE_SNP but still advertise IOMMU_CAP_CACHE_COHERENCY based on the composition of the domain. Otherwise we have to reject adding devices to a domain that change the coherency once DMA mappings are in play or never set IOMMU_CACHE and advertise to KVM that the domain is always non-coherent. Thanks, It's not that the domain is non-coherent, is it? The SNP bit allows you to *force* a PCIe DMA request to snoop the CPU cache, even if it didn't *request* that. If your device is actually *trying* to do cache-coherent (i.e. snooping) DMA, surely that works regardless of the DMA_PTE_SNP setting? Or am I missing something? -- dwmw2 smime.p7s Description: S/MIME cryptographic signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] On unmap, flush IOMMU TLB and return correct size
On Wed, 2013-09-25 at 23:15 +0100, David Woodhouse wrote: On Wed, 2013-09-25 at 15:46 -0600, Alex Williamson wrote: On Wed, 2013-09-25 at 21:11 +0100, David Woodhouse wrote: I wouldn't bother to go looking for opportunities to use super pages if we remove the last non-SP-capable IOMMU from the domain. I predict bugs getting filed if a guest sees a performance hit after adding a device that is not restored when the device is removed. If only we could assume a similar feature set among IOMMUs in a system. Ok, fine. As long as you don't have *too* much mapped, that shouldn't suck too much. Of course if we knew this was going to happen in advance of attaching the new device, I wonder if we might opt to manage it with a separate IOMMU domain. FWIW we currently screw up the handling of cache-coherent vs. non-coherent page tables too. That one wants a wbinvd somewhere when we add a non-coherent IOMMU to the domain. You're not selling the trust the IOMMU driver story very well here. Can we assume that the IOMMU_CACHE flag (SNP) is ignored appropriately by non-coherent IOMMUs? Is there any downside to ignoring it and always setting SNP in the IOMMU page tables? AMD IOMMU ignores it, but it's also always cache coherent. Thanks, SNP is a separate issue. I'm speaking of cache coherency of the hardware page table walk — the feature bit that all the horrid clflush calls are predicated on. Again, this is just a bug. We *should* be getting this right, but don't yet. And for DMA_PTE_SNP? intel_iommu_map() won't let us set this bit if the domain contains a hardware unit that doesn't support ecap.SC, but it also doesn't update existing mappings. Barring hardware bugs, it seems much easier to unconditionally set DMA_PTE_SNP but still advertise IOMMU_CAP_CACHE_COHERENCY based on the composition of the domain. Otherwise we have to reject adding devices to a domain that change the coherency once DMA mappings are in play or never set IOMMU_CACHE and advertise to KVM that the domain is always non-coherent. Thanks, It's not that the domain is non-coherent, is it? The SNP bit allows you to *force* a PCIe DMA request to snoop the CPU cache, even if it didn't *request* that. If your device is actually *trying* to do cache-coherent (i.e. snooping) DMA, surely that works regardless of the DMA_PTE_SNP setting? Or am I missing something? It's the opposite, devices are trying to do No-Snoop transactions and KVM would really rather prefer wbinvd was a no-op since emulated devices don't need it. We therefore need to toggle KVM's behavior when we have an assigned device attached to an IOMMU domain where the hardware isn't stripping the No-Snoop attribute. Some graphics cards seem to make use of this and will get error codes loading their driver if it doesn't work. Thanks, Alex ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] x86/iommu: correct ICS register offset
Joerg, Thank you for reviewing this patch. ZhenHua On 09/24/2013 07:05 PM, Joerg Roedel wrote: On Tue, Sep 17, 2013 at 04:38:29PM +0800, ZhenHua wrote: Hi Guys, Though DMAR_ICS_REG is not used yet, I think this patch is necessary. So please take a look at it. You are right, my Spec says the same. It doesn't matter much since the register seems to be unused in the VT-d driver. I applied the patch to iommu/fixes anyway. Joerg . ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH 5/7] iommu: supress loff_t compilation error on powerpc
-Original Message- From: Alex Williamson [mailto:alex.william...@redhat.com] Sent: Wednesday, September 25, 2013 10:10 PM To: Bhushan Bharat-R65777 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; linux- ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org; Bhushan Bharat-R65777 Subject: Re: [PATCH 5/7] iommu: supress loff_t compilation error on powerpc On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- drivers/vfio/pci/vfio_pci_rdwr.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 210db24..8a8156a 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -181,7 +181,8 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { int ret; - loff_t off, pos = *ppos VFIO_PCI_OFFSET_MASK; + loff_t off; + u64 pos = (u64 )(*ppos VFIO_PCI_OFFSET_MASK); void __iomem *iomem = NULL; unsigned int rsrc; bool is_ioport; What's the compile error that this fixes? I was getting below error; and after some googling I came to know that this is how it is fixed by other guys. /home/r65777/linux-vfio/drivers/vfio/pci/vfio_pci_rdwr.c:193: undefined reference to `__cmpdi2' /home/r65777/linux-vfio/drivers/vfio/pci/vfio_pci_rdwr.c:193: undefined reference to `__cmpdi2' Thanks -Bharat ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH 6/7] vfio: moving some functions in common file
-Original Message- From: linux-pci-ow...@vger.kernel.org [mailto:linux-pci-ow...@vger.kernel.org] On Behalf Of Alex Williamson Sent: Wednesday, September 25, 2013 10:33 PM To: Bhushan Bharat-R65777 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; linux- ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org; Bhushan Bharat-R65777 Subject: Re: [PATCH 6/7] vfio: moving some functions in common file On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: Some function defined in vfio_iommu_type1.c were common and we want to use these for FSL IOMMU (PAMU) and iommu-none driver. So some of them are moved to vfio_iommu_common.c I think we can do more of that but we will take this step by step. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- drivers/vfio/Makefile|4 +- drivers/vfio/vfio_iommu_common.c | 235 ++ drivers/vfio/vfio_iommu_common.h | 30 + drivers/vfio/vfio_iommu_type1.c | 206 +- 4 files changed, 268 insertions(+), 207 deletions(-) create mode 100644 drivers/vfio/vfio_iommu_common.c create mode 100644 drivers/vfio/vfio_iommu_common.h diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 72bfabc..c5792ec 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_VFIO) += vfio.o -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o +vfio_iommu_type1.o +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o +vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio_iommu_common.c b/drivers/vfio/vfio_iommu_common.c new file mode 100644 index 000..8bdc0ea --- /dev/null +++ b/drivers/vfio/vfio_iommu_common.c @@ -0,0 +1,235 @@ +/* + * VFIO: Common code for vfio IOMMU support + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson alex.william...@redhat.com + * Author: Bharat Bhushan bharat.bhus...@freescale.com + * + * This program is free software; you can redistribute it and/or +modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, p...@cisco.com + */ + +#include linux/compat.h +#include linux/device.h +#include linux/fs.h +#include linux/iommu.h +#include linux/module.h +#include linux/mm.h +#include linux/pci.h /* pci_bus_type */ +#include linux/rbtree.h +#include linux/sched.h +#include linux/slab.h +#include linux/uaccess.h +#include linux/vfio.h +#include linux/workqueue.h Please cleanup includes on both the source and target files. You obviously don't need linux/pci.h here for one. Will do. + +static bool disable_hugepages; +module_param_named(disable_hugepages, + disable_hugepages, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable_hugepages, +Disable VFIO IOMMU support for IOMMU hugepages.); + +struct vwork { + struct mm_struct*mm; + longnpage; + struct work_struct work; +}; + +/* delayed decrement/increment for locked_vm */ void +vfio_lock_acct_bg(struct work_struct *work) { + struct vwork *vwork = container_of(work, struct vwork, work); + struct mm_struct *mm; + + mm = vwork-mm; + down_write(mm-mmap_sem); + mm-locked_vm += vwork-npage; + up_write(mm-mmap_sem); + mmput(mm); + kfree(vwork); +} + +void vfio_lock_acct(long npage) +{ + struct vwork *vwork; + struct mm_struct *mm; + + if (!current-mm || !npage) + return; /* process exited or nothing to do */ + + if (down_write_trylock(current-mm-mmap_sem)) { + current-mm-locked_vm += npage; + up_write(current-mm-mmap_sem); + return; + } + + /* +* Couldn't get mmap_sem lock, so must setup to update +* mm-locked_vm later. If locked_vm were atomic, we +* wouldn't need this silliness +*/ + vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); + if (!vwork) + return; + mm = get_task_mm(current); + if (!mm) { + kfree(vwork); + return; + } + INIT_WORK(vwork-work, vfio_lock_acct_bg); + vwork-mm = mm; + vwork-npage = npage; + schedule_work(vwork-work); +} + +/* + * Some mappings aren't backed by a struct page, for example an +mmap'd + * MMIO range for our own or another device. These use a different + * pfn