[PATCH 1/1] iommu/vt-d: Decouple igfx_off from graphic identity mapping
A kernel command called igfx_off was introduced in commit ("Intel IOMMU: Intel IOMMU driver"). This command allows the user to disable the IOMMU dedicated to SOC-integrated graphic devices. Commit <9452618e7462> ("iommu/intel: disable DMAR for g4x integrated gfx") used this mechanism to disable the graphic-dedicated IOMMU for some problematic devices. Later, more problematic graphic devices were added to the list by commit <1f76249cc3beb> ("iommu/vt-d: Declare Broadwell igfx dmar support snafu"). On the other hand, commit <19943b0e30b05> ("intel-iommu: Unify hardware and software passthrough support") uses the identity domain for graphic devices if CONFIG_DMAR_BROKEN_GFX_WA is selected. + if (iommu_pass_through) + iommu_identity_mapping = 1; +#ifdef CONFIG_DMAR_BROKEN_GFX_WA + else + iommu_identity_mapping = 2; +#endif ... static int iommu_should_identity_map(struct pci_dev *pdev, int startup) { +if (iommu_identity_mapping == 2) +return IS_GFX_DEVICE(pdev); ... In the following driver evolution, CONFIG_DMAR_BROKEN_GFX_WA and quirk_iommu_igfx() are mixed together, causing confusion in the driver's device_def_domain_type callback. On one hand, dmar_map_gfx is used to turn off the graphic-dedicated IOMMU as a workaround for some buggy hardware; on the other hand, for those graphic devices, IDENTITY mapping is required for the IOMMU core. Commit <4b8d18c0c986> "iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA" has removed the CONFIG_DMAR_BROKEN_GFX_WA option, so the IDENTITY_DOMAIN requirement for graphic devices is no longer needed. Therefore, this requirement can be removed from device_def_domain_type() and igfx_off can be made independent. Fixes: 4b8d18c0c986 ("iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA") Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 19 ++- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index fbbf8fda22f3..57a986524502 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -217,12 +217,11 @@ int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); -static int dmar_map_gfx = 1; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; static int iommu_skip_te_disable; +static int disable_igfx_dedicated_iommu; -#define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA4 const struct iommu_ops intel_iommu_ops; @@ -261,7 +260,7 @@ static int __init intel_iommu_setup(char *str) no_platform_optin = 1; pr_info("IOMMU disabled\n"); } else if (!strncmp(str, "igfx_off", 8)) { - dmar_map_gfx = 0; + disable_igfx_dedicated_iommu = 1; pr_info("Disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); @@ -2196,9 +2195,6 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return IOMMU_DOMAIN_IDENTITY; - - if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return IOMMU_DOMAIN_IDENTITY; } return 0; @@ -2499,9 +2495,6 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } - if (!dmar_map_gfx) - iommu_identity_mapping |= IDENTMAP_GFX; - check_tylersburg_isoch(); ret = si_domain_init(hw_pass_through); @@ -2592,7 +2585,7 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ drhd->gfx_dedicated = 1; - if (!dmar_map_gfx) + if (disable_igfx_dedicated_iommu) drhd->ignored = 1; } } @@ -4621,7 +4614,7 @@ static void quirk_iommu_igfx(struct pci_dev *dev) return; pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); - dmar_map_gfx = 0; + disable_igfx_dedicated_iommu = 1; } /* G4x/GM45 integrated gfx dmar support is totally busted. */ @@ -4702,8 +4695,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) if (!(ggc & GGC_MEMORY_VT_ENABLED)) { pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); - dmar_map_gfx = 0; - } else if (dmar_
[PATCH v2 1/1] iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA
Commit 62edf5dc4a524 ("intel-iommu: Restore DMAR_BROKEN_GFX_WA option for broken graphics drivers") was introduced 24 years ago as a temporary workaround for graphics drivers that used physical addresses for DMA and avoided DMA APIs. This workaround was disabled by default. As 24 years have passed, it is expected that graphics driver developers have migrated their drivers to use kernel DMA APIs. Therefore, this workaround is no longer required and could been removed. The Intel iommu driver also provides a "igfx_off" option to turn off the DAM translation for the graphic dedicated IOMMU. Hence, there is really no good reason to keep this config option. Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian --- drivers/iommu/intel/iommu.c | 4 drivers/iommu/intel/Kconfig | 11 --- 2 files changed, 15 deletions(-) Change log: v2: - Add igfx_off option to commit message and Cc Intel graphic maintainers. v1: https://lore.kernel.org/linux-iommu/20240127064512.16744-1-baolu...@linux.intel.com/ diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fb5f6fceea1..fc52fcd786aa 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2660,10 +2660,6 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } -#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA - dmar_map_gfx = 0; -#endif - if (!dmar_map_gfx) iommu_identity_mapping |= IDENTMAP_GFX; diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 012cd2541a68..d2d34eb28d94 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -64,17 +64,6 @@ config INTEL_IOMMU_DEFAULT_ON one is found. If this option is not selected, DMAR support can be enabled by passing intel_iommu=on to the kernel. -config INTEL_IOMMU_BROKEN_GFX_WA - bool "Workaround broken graphics drivers (going away soon)" - depends on BROKEN && X86 - help - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA, at least until this - option is removed in the 2.6.32 kernel. - config INTEL_IOMMU_FLOPPY_WA def_bool y depends on X86 -- 2.34.1
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
On 2021/11/25 19:47, Robin Murphy wrote: On 2021-11-25 10:42, Tvrtko Ursulin wrote: From: Tvrtko Ursulin With both integrated and discrete Intel GPUs in a system, the current global check of intel_iommu_gfx_mapped, as done from intel_vtd_active() may not be completely accurate. In this patch we add i915 parameter to intel_vtd_active() in order to prepare it for multiple GPUs and we also change the check away from Intel specific intel_iommu_gfx_mapped (global exported by the Intel IOMMU driver) to probing the presence of IOMMU domain on a specific device using iommu_get_domain_for_dev(). FWIW the way you have it now is functionally equivalent to using device_iommu_mapped(), which I think might be slightly clearer for the current intent, but I don't have a significantly strong preference (after all, this *was* the de-facto way of checking before device_iommu_mapped() was introduced, and there are still other examples of it around). So from the IOMMU perspective, Acked-by: Robin Murphy Perhaps the AGP driver could also be tweaked and intel_iommu_gfx_mapped cleaned away entirely, but I'll leave that for Baolu to think about :) I fully agreed with Robin. I prefer device_iommu_mapped() more than iommu_get_domain_for_dev(). "iommu_get_domain_for_dev(dev) == NULL" simply means that the device does not have any domain attached. Although at present, it is equivalent to device DMAing without IOMMU translation. But I'm sure it will change in the future. With device_iommu_mapped() replaced, Reviewed-by: Lu Baolu Best regards, baolu
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
On 11/11/21 11:18 PM, Tvrtko Ursulin wrote: On 10/11/2021 14:37, Robin Murphy wrote: On 2021-11-10 14:11, Tvrtko Ursulin wrote: On 10/11/2021 12:35, Lu Baolu wrote: On 2021/11/10 20:08, Tvrtko Ursulin wrote: On 10/11/2021 12:04, Lu Baolu wrote: On 2021/11/10 17:30, Tvrtko Ursulin wrote: On 10/11/2021 07:12, Lu Baolu wrote: Hi Tvrtko, On 2021/11/9 20:17, Tvrtko Ursulin wrote: From: Tvrtko Ursulin On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped and probe presence of iommu domain per device to accurately reflect its status. Signed-off-by: Tvrtko Ursulin Cc: Lu Baolu --- Baolu, is my understanding here correct? Maybe I am confused by both intel_iommu_gfx_mapped and dmar_map_gfx being globals in the intel_iommu driver. But it certainly appears the setup can assign some iommu ops (and assign the discrete i915 to iommu group) when those two are set to off. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e967cd08f23e..9fb38a54f1fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void) #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -static inline bool intel_vtd_active(void) +static inline bool intel_vtd_active(struct drm_i915_private *i915) { -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (iommu_get_domain_for_dev(i915->drm.dev)) return true; -#endif /* Running as a guest, we assume the host is enforcing VT'd */ return run_as_guest(); } Have you verified this change? I am afraid that iommu_get_domain_for_dev() always gets a valid iommu domain even intel_iommu_gfx_mapped == 0. Yes it seems to work as is: default: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled intel_iommu=igfx_off: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled On my system dri device 0 is integrated graphics and 1 is discrete. The drm device 0 has a dedicated iommu. When the user request igfx not mapped, the VT-d implementation will turn it off to save power. But for shared iommu, you definitely will get it enabled. Sorry I am not following, what exactly do you mean? Is there a platform with integrated graphics without a dedicated iommu, in which case intel_iommu=igfx_off results in intel_iommu_gfx_mapped == 0 and iommu_get_domain_for_dev returning non-NULL? Your code always work for an igfx with a dedicated iommu. This might be always true on today's platforms. But from driver's point of view, we should not make such assumption. For example, if the iommu implementation decides not to turn off the graphic iommu (perhaps due to some hw quirk or for graphic virtualization), your code will be broken. If I got it right, this would go back to your earlier recommendation to have the check look like this: static bool intel_vtd_active(struct drm_i915_private *i915) { struct iommu_domain *domain; domain = iommu_get_domain_for_dev(i915->drm.dev); if (domain && (domain->type & __IOMMU_DOMAIN_PAGING)) return true; ... This would be okay as a first step? Elsewhere in the thread Robin suggested looking at the dec->dma_ops and comparing against iommu_dma_ops. These two solution would be effectively the same? Effectively, yes. See iommu_setup_dma_ops() - the only way to end up with iommu_dma_ops is if a managed translation domain is present; if the IOMMU is present but the default domain type has been set to passthrough (either globally or forced for the given device) it will do nothing and leave you with dma-direct, while if the IOMMU has been ignored entirely then it should never even be called. Thus it neatly encapsulates what you're after here. One concern I have is whether the pass-through mode truly does nothing or addresses perhaps still go through the dmar hardware just with no translation? Pass-through mode means the latter. If latter then most like for like change is actually exactly what the first version of my patch did. That is replace intel_iommu_gfx_mapped with a plain non-NULL check on iommu_get_domain_for_dev. Depends on what you want here, #1) the graphic device works in iommu pass-through mode - device have an iommu - but iommu does no translation - the dma transactions go through iommu with the same destination memory address specified by the device; #2) the graphic device works without a system iommu - the iommu is off - there's no iommu on the path of DMA transaction. My
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
On 11/11/21 11:06 PM, Tvrtko Ursulin wrote: On 10/11/2021 12:35, Lu Baolu wrote: On 2021/11/10 20:08, Tvrtko Ursulin wrote: On 10/11/2021 12:04, Lu Baolu wrote: On 2021/11/10 17:30, Tvrtko Ursulin wrote: On 10/11/2021 07:12, Lu Baolu wrote: Hi Tvrtko, On 2021/11/9 20:17, Tvrtko Ursulin wrote: From: Tvrtko Ursulin On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped and probe presence of iommu domain per device to accurately reflect its status. Signed-off-by: Tvrtko Ursulin Cc: Lu Baolu --- Baolu, is my understanding here correct? Maybe I am confused by both intel_iommu_gfx_mapped and dmar_map_gfx being globals in the intel_iommu driver. But it certainly appears the setup can assign some iommu ops (and assign the discrete i915 to iommu group) when those two are set to off. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e967cd08f23e..9fb38a54f1fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void) #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -static inline bool intel_vtd_active(void) +static inline bool intel_vtd_active(struct drm_i915_private *i915) { -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (iommu_get_domain_for_dev(i915->drm.dev)) return true; -#endif /* Running as a guest, we assume the host is enforcing VT'd */ return run_as_guest(); } Have you verified this change? I am afraid that iommu_get_domain_for_dev() always gets a valid iommu domain even intel_iommu_gfx_mapped == 0. Yes it seems to work as is: default: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled intel_iommu=igfx_off: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled On my system dri device 0 is integrated graphics and 1 is discrete. The drm device 0 has a dedicated iommu. When the user request igfx not mapped, the VT-d implementation will turn it off to save power. But for shared iommu, you definitely will get it enabled. Sorry I am not following, what exactly do you mean? Is there a platform with integrated graphics without a dedicated iommu, in which case intel_iommu=igfx_off results in intel_iommu_gfx_mapped == 0 and iommu_get_domain_for_dev returning non-NULL? Your code always work for an igfx with a dedicated iommu. This might be always true on today's platforms. But from driver's point of view, we should not make such assumption. For example, if the iommu implementation decides not to turn off the graphic iommu (perhaps due to some hw quirk or for graphic virtualization), your code will be broken. I tried your suggestion (checking for __IOMMU_DOMAIN_PAGING) and it works better, however I have observed one odd behaviour (for me at least). In short - why does the DMAR mode for the discrete device change depending on igfx_off parameter? Consider the laptop has these two graphics cards: # cat /sys/kernel/debug/dri/0/name i915 dev=:00:02.0 unique=:00:02.0 # integrated # cat /sys/kernel/debug/dri/1/name i915 dev=:03:00.0 unique=:03:00.0 # discrete Booting with different options: === default / intel_iommu=on # cat /sys/class/iommu/dmar0/devices/:00:02.0/iommu_group/type DMA-FQ # cat /sys/class/iommu/dmar2/devices/:03:00.0/iommu_group/type DMA-FQ # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled All good. intel_iommu=igfx_off ## no dmar0 in sysfs # cat /sys/class/iommu/dmar2/devices/:03:00.0/iommu_group/type identity Unexpected!? # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: disabled # At least the i915 patch detects it correctly. intel_iommu=off --- ## no dmar0 in sysfs ## no dmar2 in sysfs # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: disabled All good. The fact discrete graphics changes from translated to pass-through when igfx_off is set is surprising to me. Is this a bug? The existing VT-d implementation doesn't distinguish igfx from dgfx. It only checks whether the device is of a display class: #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) When igfx_off
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
On 2021/11/10 20:08, Tvrtko Ursulin wrote: On 10/11/2021 12:04, Lu Baolu wrote: On 2021/11/10 17:30, Tvrtko Ursulin wrote: On 10/11/2021 07:12, Lu Baolu wrote: Hi Tvrtko, On 2021/11/9 20:17, Tvrtko Ursulin wrote: From: Tvrtko Ursulin On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped and probe presence of iommu domain per device to accurately reflect its status. Signed-off-by: Tvrtko Ursulin Cc: Lu Baolu --- Baolu, is my understanding here correct? Maybe I am confused by both intel_iommu_gfx_mapped and dmar_map_gfx being globals in the intel_iommu driver. But it certainly appears the setup can assign some iommu ops (and assign the discrete i915 to iommu group) when those two are set to off. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e967cd08f23e..9fb38a54f1fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void) #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -static inline bool intel_vtd_active(void) +static inline bool intel_vtd_active(struct drm_i915_private *i915) { -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (iommu_get_domain_for_dev(i915->drm.dev)) return true; -#endif /* Running as a guest, we assume the host is enforcing VT'd */ return run_as_guest(); } Have you verified this change? I am afraid that iommu_get_domain_for_dev() always gets a valid iommu domain even intel_iommu_gfx_mapped == 0. Yes it seems to work as is: default: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled intel_iommu=igfx_off: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled On my system dri device 0 is integrated graphics and 1 is discrete. The drm device 0 has a dedicated iommu. When the user request igfx not mapped, the VT-d implementation will turn it off to save power. But for shared iommu, you definitely will get it enabled. Sorry I am not following, what exactly do you mean? Is there a platform with integrated graphics without a dedicated iommu, in which case intel_iommu=igfx_off results in intel_iommu_gfx_mapped == 0 and iommu_get_domain_for_dev returning non-NULL? Your code always work for an igfx with a dedicated iommu. This might be always true on today's platforms. But from driver's point of view, we should not make such assumption. For example, if the iommu implementation decides not to turn off the graphic iommu (perhaps due to some hw quirk or for graphic virtualization), your code will be broken. Best regards, baolu
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
On 2021/11/10 17:35, Tvrtko Ursulin wrote: On 10/11/2021 07:25, Lu Baolu wrote: On 2021/11/10 1:35, Tvrtko Ursulin wrote: On 09/11/2021 17:19, Lucas De Marchi wrote: On Tue, Nov 09, 2021 at 12:17:59PM +, Tvrtko Ursulin wrote: From: Tvrtko Ursulin On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped and probe presence of iommu domain per device to accurately reflect its status. nice, I was just starting to look into thus but for another reason: we are adding support for other archs, like aarch64, and the global from here was a problem Yes I realized the other iommu angle as well. To do this properly we need to sort the intel_vtd_active call sites into at least two buckets - which are truly about VT-d and which are just IOMMU. For instance the THP decision in i915_gemfs.co would be "are we behind any iommu". Some other call sites are possibly only about the bugs in the igfx iommu. Not sure if there is a third bucket for any potential differences between igfx iommu and other Intel iommu in case of dgfx. I'd like to hear from Baolu as well to confirm if intel_iommu driver is handling igfx + dgfx correctly in respect to the two global variables I mention in the commit message. I strongly agree that the drivers should call the IOMMU interface directly for portability. For Intel graphic driver, we have two issues: #1) driver asks vt-d driver for identity map with intel_iommu=igfx_off. #2) driver query the status with a global intel_iommu_gfx_mapped. We need to solve these two problems step by step. This patch is definitely a good start point. (I should have really consolidated the thread, but never mind now.) You mean good starting point for the discussion or between your first and second email you started thinking it may even work? Because as I wrote in the other email, it appears to work. But I fully accept it may be by accident and you may suggest a proper API to be added to the IOMMU core, which I would then be happy to use. If maybe not immediately, perhaps we could start with this patch and going forward add something more detailed. Like for instance allowing us to query the name/id of the iommu driver in case i915 needs to apply different quirks across them? Not sure how feasible that would be, but at the moment the need does sound plausible to me. The whole story in my mind looks like below: 1. The graphic device driver has its own way to let the user specify iommu-bypass mode. For example, early_param()... 2. If the iommu-bypass mode is set, the graphic device will get below set: dev->dma_ops_bypass = true; 3. The iommu probe procedure will use identity domain (non-mapped mode) for the device because value of dev->dma_ops_bypass is true. That's all. The device driver doesn't need to check iommu for the mapping mode. And this framework will be generic and could be used by other device drivers. The difficulty in doing so is that step 2 must happen before or during the device probe process. At present, I haven't figured out how to do it yet. :-) Best regards, baolu
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
On 2021/11/10 17:30, Tvrtko Ursulin wrote: On 10/11/2021 07:12, Lu Baolu wrote: Hi Tvrtko, On 2021/11/9 20:17, Tvrtko Ursulin wrote: From: Tvrtko Ursulin On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped and probe presence of iommu domain per device to accurately reflect its status. Signed-off-by: Tvrtko Ursulin Cc: Lu Baolu --- Baolu, is my understanding here correct? Maybe I am confused by both intel_iommu_gfx_mapped and dmar_map_gfx being globals in the intel_iommu driver. But it certainly appears the setup can assign some iommu ops (and assign the discrete i915 to iommu group) when those two are set to off. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e967cd08f23e..9fb38a54f1fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void) #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -static inline bool intel_vtd_active(void) +static inline bool intel_vtd_active(struct drm_i915_private *i915) { -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (iommu_get_domain_for_dev(i915->drm.dev)) return true; -#endif /* Running as a guest, we assume the host is enforcing VT'd */ return run_as_guest(); } Have you verified this change? I am afraid that iommu_get_domain_for_dev() always gets a valid iommu domain even intel_iommu_gfx_mapped == 0. Yes it seems to work as is: default: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled intel_iommu=igfx_off: # grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities /sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled /sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled On my system dri device 0 is integrated graphics and 1 is discrete. The drm device 0 has a dedicated iommu. When the user request igfx not mapped, the VT-d implementation will turn it off to save power. But for shared iommu, you definitely will get it enabled. Best regards, baolu
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
On 2021/11/10 1:35, Tvrtko Ursulin wrote: On 09/11/2021 17:19, Lucas De Marchi wrote: On Tue, Nov 09, 2021 at 12:17:59PM +, Tvrtko Ursulin wrote: From: Tvrtko Ursulin On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped and probe presence of iommu domain per device to accurately reflect its status. nice, I was just starting to look into thus but for another reason: we are adding support for other archs, like aarch64, and the global from here was a problem Yes I realized the other iommu angle as well. To do this properly we need to sort the intel_vtd_active call sites into at least two buckets - which are truly about VT-d and which are just IOMMU. For instance the THP decision in i915_gemfs.co would be "are we behind any iommu". Some other call sites are possibly only about the bugs in the igfx iommu. Not sure if there is a third bucket for any potential differences between igfx iommu and other Intel iommu in case of dgfx. I'd like to hear from Baolu as well to confirm if intel_iommu driver is handling igfx + dgfx correctly in respect to the two global variables I mention in the commit message. I strongly agree that the drivers should call the IOMMU interface directly for portability. For Intel graphic driver, we have two issues: #1) driver asks vt-d driver for identity map with intel_iommu=igfx_off. #2) driver query the status with a global intel_iommu_gfx_mapped. We need to solve these two problems step by step. This patch is definitely a good start point. Best regards, baolu
Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check
Hi Tvrtko, On 2021/11/9 20:17, Tvrtko Ursulin wrote: From: Tvrtko Ursulin On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped and probe presence of iommu domain per device to accurately reflect its status. Signed-off-by: Tvrtko Ursulin Cc: Lu Baolu --- Baolu, is my understanding here correct? Maybe I am confused by both intel_iommu_gfx_mapped and dmar_map_gfx being globals in the intel_iommu driver. But it certainly appears the setup can assign some iommu ops (and assign the discrete i915 to iommu group) when those two are set to off. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e967cd08f23e..9fb38a54f1fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void) #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -static inline bool intel_vtd_active(void) +static inline bool intel_vtd_active(struct drm_i915_private *i915) { -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (iommu_get_domain_for_dev(i915->drm.dev)) return true; -#endif /* Running as a guest, we assume the host is enforcing VT'd */ return run_as_guest(); } Have you verified this change? I am afraid that iommu_get_domain_for_dev() always gets a valid iommu domain even intel_iommu_gfx_mapped == 0. A possible way could look like this: static bool intel_vtd_active(struct drm_i915_private *i915) { struct iommu_domain *domain; domain = iommu_get_domain_for_dev(i915->drm.dev); if (domain && (domain->type & __IOMMU_DOMAIN_PAGING)) return true; ... ... } Actually I don't like this either since it checks the domain->type out of the iommu subsystem. We could refactor this later by export an iommu interface for this check. Best regards, baolu
Re: [Intel-gfx] [PATCH 1/4] iommu/vt-d: Disable superpage for Geminilake igfx
On 7/14/21 4:30 AM, Ville Syrjälä wrote: On Tue, Jul 13, 2021 at 09:34:09AM +0800, Lu Baolu wrote: On 7/12/21 11:47 PM, Ville Syrjälä wrote: On Mon, Jul 12, 2021 at 07:23:07AM +0800, Lu Baolu wrote: On 7/10/21 12:47 AM, Ville Syrjala wrote: From: Ville Syrjälä While running "gem_exec_big --r single" from igt-gpu-tools on Geminilake as soon as a 2M mapping is made I tend to get a DMAR write fault. Strangely the faulting address is always a 4K page and usually very far away from the 2M page that got mapped. But if no 2M mappings get used I can't reproduce the fault. I also tried to dump the PTE for the faulting address but it actually looks correct to me (ie. definitely seems to have the write bit set): DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [00:02.0] PASID fault addr 7fa8a78000 [fault reason 05] PTE Write access is not set DMAR: fault 7fa8a78000 (level=1) PTE = 149efc003 So not really sure what's going on and this might just be full on duct tape, but it seems to work here. The machine has now survived a whole day running that test whereas with superpage enabled it fails in less than a minute usually. TODO: might be nice to disable superpage only for the igfx iommu instead of both iommus If all these quirks are about igfx dedicated iommu's, I would suggest to disable superpage only for the igfx ones. Sure. Unfortunately there's no convenient mechanism to do that in the iommu driver that I can immediately see. So not something I can just whip up easily. Since you're actually familiar with the driver maybe you can come up with a decent solution for that? How about something like below? [no compile, no test...] diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1131b8efb050..2d51ef288a9e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -338,6 +338,7 @@ static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; static int iommu_skip_te_disable; +static int iommu_skip_igfx_superpage; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 @@ -652,6 +653,27 @@ static bool domain_update_iommu_snooping(struct intel_iommu *skip) return ret; } +static bool domain_use_super_page(struct dmar_domain *domain) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ret = true; + + if (!intel_iommu_superpage) + return false; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (drhd->gfx_dedicated && iommu_skip_igfx_superpage) { + ret = false; + break ^ Missing semicolon. Othwerwise seems to work great here. Thanks. Are you going to turn this into a proper patch, or do you want me to just squash this into my patches and repost? Please go ahead with a new version. Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/4] iommu/vt-d: Disable superpage for Geminilake igfx
On 7/12/21 11:47 PM, Ville Syrjälä wrote: On Mon, Jul 12, 2021 at 07:23:07AM +0800, Lu Baolu wrote: On 7/10/21 12:47 AM, Ville Syrjala wrote: From: Ville Syrjälä While running "gem_exec_big --r single" from igt-gpu-tools on Geminilake as soon as a 2M mapping is made I tend to get a DMAR write fault. Strangely the faulting address is always a 4K page and usually very far away from the 2M page that got mapped. But if no 2M mappings get used I can't reproduce the fault. I also tried to dump the PTE for the faulting address but it actually looks correct to me (ie. definitely seems to have the write bit set): DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [00:02.0] PASID fault addr 7fa8a78000 [fault reason 05] PTE Write access is not set DMAR: fault 7fa8a78000 (level=1) PTE = 149efc003 So not really sure what's going on and this might just be full on duct tape, but it seems to work here. The machine has now survived a whole day running that test whereas with superpage enabled it fails in less than a minute usually. TODO: might be nice to disable superpage only for the igfx iommu instead of both iommus If all these quirks are about igfx dedicated iommu's, I would suggest to disable superpage only for the igfx ones. Sure. Unfortunately there's no convenient mechanism to do that in the iommu driver that I can immediately see. So not something I can just whip up easily. Since you're actually familiar with the driver maybe you can come up with a decent solution for that? How about something like below? [no compile, no test...] diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1131b8efb050..2d51ef288a9e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -338,6 +338,7 @@ static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; static int iommu_skip_te_disable; +static int iommu_skip_igfx_superpage; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA4 @@ -652,6 +653,27 @@ static bool domain_update_iommu_snooping(struct intel_iommu *skip) return ret; } +static bool domain_use_super_page(struct dmar_domain *domain) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ret = true; + + if (!intel_iommu_superpage) + return false; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (drhd->gfx_dedicated && iommu_skip_igfx_superpage) { + ret = false; + break + } + } + rcu_read_unlock(); + + return ret; +} + static int domain_update_iommu_superpage(struct dmar_domain *domain, struct intel_iommu *skip) { @@ -659,7 +681,7 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain, struct intel_iommu *iommu; int mask = 0x3; - if (!intel_iommu_superpage) + if (!domain_use_super_page(domain)) return 0; /* set iommu_superpage to the smallest common denominator */ @@ -5656,6 +5678,14 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); +static void quirk_skip_igfx_superpage(struct pci_dev *dev) +{ + pci_info(dev, "Disabling IOMMU superpage for graphics on this chipset\n"); + iommu_skip_igfx_superpage = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3184, quirk_skip_igfx_superpage); + static void quirk_iommu_rwbf(struct pci_dev *dev) { if (risky_device(dev)) Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/4] iommu/vt-d: Disable superpage for Geminilake igfx
On 7/10/21 12:47 AM, Ville Syrjala wrote: From: Ville Syrjälä While running "gem_exec_big --r single" from igt-gpu-tools on Geminilake as soon as a 2M mapping is made I tend to get a DMAR write fault. Strangely the faulting address is always a 4K page and usually very far away from the 2M page that got mapped. But if no 2M mappings get used I can't reproduce the fault. I also tried to dump the PTE for the faulting address but it actually looks correct to me (ie. definitely seems to have the write bit set): DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [00:02.0] PASID fault addr 7fa8a78000 [fault reason 05] PTE Write access is not set DMAR: fault 7fa8a78000 (level=1) PTE = 149efc003 So not really sure what's going on and this might just be full on duct tape, but it seems to work here. The machine has now survived a whole day running that test whereas with superpage enabled it fails in less than a minute usually. TODO: might be nice to disable superpage only for the igfx iommu instead of both iommus If all these quirks are about igfx dedicated iommu's, I would suggest to disable superpage only for the igfx ones. Best regards, baolu TODO: would be nice to use the macros from include/drm/i915_pciids.h, but can't do that with DECLARE_PCI_FIXUP_HEADER() Cc: David Woodhouse Cc: Lu Baolu Cc: io...@lists.linux-foundation.org Signed-off-by: Ville Syrjälä --- drivers/iommu/intel/iommu.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 19c7888cbb86..4fff2c9c86af 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5617,6 +5617,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); +static void quirk_iommu_nosp(struct pci_dev *dev) +{ + pci_info(dev, "Disabling IOMMU superpage for graphics on this chipset\n"); + intel_iommu_superpage = 0; +} + +/* Geminilake igfx appears to have issues with superpage */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3184, quirk_iommu_nosp); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3185, quirk_iommu_nosp); + static void quirk_iommu_rwbf(struct pci_dev *dev) { if (risky_device(dev)) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api
On 2020/11/3 18:54, Joerg Roedel wrote: Hi, On Tue, Nov 03, 2020 at 11:58:26AM +0200, Joonas Lahtinen wrote: Would that work for you? We intend to send the feature pull requests to DRM for 5.11 in the upcoming weeks. For the IOMMU side it is best to include the workaround for now. When the DRM fixes are merged into v5.11-rc1 together with this conversion, it can be reverted and will not be in 5.11-final. Okay! So I will keep the workaround and send a new version (mostly rebase) to Will. Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api
On 11/2/20 7:52 PM, Tvrtko Ursulin wrote: On 02/11/2020 02:00, Lu Baolu wrote: Hi Tvrtko, On 10/12/20 4:44 PM, Tvrtko Ursulin wrote: On 29/09/2020 01:11, Lu Baolu wrote: Hi Tvrtko, On 9/28/20 5:44 PM, Tvrtko Ursulin wrote: On 27/09/2020 07:34, Lu Baolu wrote: Hi, The previous post of this series could be found here. https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ This version introduce a new patch [4/7] to fix an issue reported here. https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ There aren't any other changes. Please help to test and review. Best regards, baolu Lu Baolu (3): iommu: Add quirk for Intel graphic devices in map_sg Since I do have patches to fix i915 to handle this, do we want to co-ordinate the two and avoid having to add this quirk and then later remove it? Or you want to go the staged approach? I have no preference. It depends on which patch goes first. Let the maintainers help here. FYI we have merged the required i915 patches to out tree last week or so. I *think* this means they will go into 5.11. So the i915 specific workaround patch will not be needed in Intel IOMMU. Do you mind telling me what's the status of this fix patch? I tried this series on v5.10-rc1 with the graphic quirk patch dropped. I am still seeing dma faults from graphic device. Hmm back then I thought i915 fixes for this would land in 5.11 so I will stick with that. :) (See my quoted text a paragraph above yours.) What size are those fixes? I am considering pushing this series for v5.11. Is it possible to get some acks for those patches and let them go to Linus through iommu tree? Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api
Hi Tvrtko, On 10/12/20 4:44 PM, Tvrtko Ursulin wrote: On 29/09/2020 01:11, Lu Baolu wrote: Hi Tvrtko, On 9/28/20 5:44 PM, Tvrtko Ursulin wrote: On 27/09/2020 07:34, Lu Baolu wrote: Hi, The previous post of this series could be found here. https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ This version introduce a new patch [4/7] to fix an issue reported here. https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ There aren't any other changes. Please help to test and review. Best regards, baolu Lu Baolu (3): iommu: Add quirk for Intel graphic devices in map_sg Since I do have patches to fix i915 to handle this, do we want to co-ordinate the two and avoid having to add this quirk and then later remove it? Or you want to go the staged approach? I have no preference. It depends on which patch goes first. Let the maintainers help here. FYI we have merged the required i915 patches to out tree last week or so. I *think* this means they will go into 5.11. So the i915 specific workaround patch will not be needed in Intel IOMMU. Do you mind telling me what's the status of this fix patch? I tried this series on v5.10-rc1 with the graphic quirk patch dropped. I am still seeing dma faults from graphic device. Best regards, baolu Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api
Hi Tvrtko, On 10/12/20 4:44 PM, Tvrtko Ursulin wrote: On 29/09/2020 01:11, Lu Baolu wrote: Hi Tvrtko, On 9/28/20 5:44 PM, Tvrtko Ursulin wrote: On 27/09/2020 07:34, Lu Baolu wrote: Hi, The previous post of this series could be found here. https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ This version introduce a new patch [4/7] to fix an issue reported here. https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ There aren't any other changes. Please help to test and review. Best regards, baolu Lu Baolu (3): iommu: Add quirk for Intel graphic devices in map_sg Since I do have patches to fix i915 to handle this, do we want to co-ordinate the two and avoid having to add this quirk and then later remove it? Or you want to go the staged approach? I have no preference. It depends on which patch goes first. Let the maintainers help here. FYI we have merged the required i915 patches to out tree last week or so. I *think* this means they will go into 5.11. So the i915 specific workaround patch will not be needed in Intel IOMMU. Thanks for letting us know this. I will drop the workaround patch and test the whole series after the next rc1. Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api
Hi Joerg, On 2020/10/1 20:17, Joerg Roedel wrote: Hi Baolu, On Tue, Sep 29, 2020 at 08:11:35AM +0800, Lu Baolu wrote: I have no preference. It depends on which patch goes first. Let the maintainers help here. No preference on my side, except that it is too late for this now to make it into v5.10. Besides that I let the decission up to you when this is ready. Just send me a pull-request when it should get into the iommu-tree. Sure. Best regards, baolu Regards, Joerg ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api
Hi Tvrtko, On 9/28/20 5:44 PM, Tvrtko Ursulin wrote: On 27/09/2020 07:34, Lu Baolu wrote: Hi, The previous post of this series could be found here. https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ This version introduce a new patch [4/7] to fix an issue reported here. https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ There aren't any other changes. Please help to test and review. Best regards, baolu Lu Baolu (3): iommu: Add quirk for Intel graphic devices in map_sg Since I do have patches to fix i915 to handle this, do we want to co-ordinate the two and avoid having to add this quirk and then later remove it? Or you want to go the staged approach? I have no preference. It depends on which patch goes first. Let the maintainers help here. Best regards, baolu Regards, Tvrtko iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev iommu/vt-d: Cleanup after converting to dma-iommu ops Tom Murphy (4): iommu: Handle freelists when using deferred flushing in iommu drivers iommu: Add iommu_dma_free_cpu_cached_iovas() iommu: Allow the dma-iommu api to use bounce buffers iommu/vt-d: Convert intel iommu driver to the iommu ops .../admin-guide/kernel-parameters.txt | 5 - drivers/iommu/dma-iommu.c | 228 - drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 901 +++--- include/linux/dma-iommu.h | 8 + include/linux/iommu.h | 1 + 6 files changed, 336 insertions(+), 808 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 7/7] iommu/vt-d: Cleanup after converting to dma-iommu ops
Some cleanups after converting the driver to use dma-iommu ops. - Remove nobounce option; - Cleanup and simplify the path in domain mapping. Signed-off-by: Lu Baolu --- .../admin-guide/kernel-parameters.txt | 5 -- drivers/iommu/intel/iommu.c | 90 ++- 2 files changed, 28 insertions(+), 67 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1068742a6df..0d11ef43d314 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1866,11 +1866,6 @@ Note that using this option lowers the security provided by tboot because it makes the system vulnerable to DMA attacks. - nobounce [Default off] - Disable bounce buffer for untrusted devices such as - the Thunderbolt devices. This will treat the untrusted - devices as the trusted ones, hence might expose security - risks of DMA attacks. intel_idle.max_cstate= [KNL,HW,ACPI,X86] 0 disables intel_idle and fall back on acpi_idle. diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 69ccf92ab37b..5135d9ba0993 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -355,7 +355,6 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; -static int intel_no_bounce; static int iommu_skip_te_disable; #define IDENTMAP_GFX 2 @@ -457,9 +456,6 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "tboot_noforce", 13)) { pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); intel_iommu_tboot_noforce = 1; - } else if (!strncmp(str, "nobounce", 8)) { - pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); - intel_no_bounce = 1; } str += strcspn(str, ","); @@ -2277,15 +2273,14 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain, return level; } -static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - struct scatterlist *sg, unsigned long phys_pfn, - unsigned long nr_pages, int prot) +static int +__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, +unsigned long phys_pfn, unsigned long nr_pages, int prot) { struct dma_pte *first_pte = NULL, *pte = NULL; - phys_addr_t pteval; - unsigned long sg_res = 0; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + phys_addr_t pteval; u64 attr; BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); @@ -2297,26 +2292,14 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (domain_use_first_level(domain)) attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; - if (!sg) { - sg_res = nr_pages; - pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; - } + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; while (nr_pages > 0) { uint64_t tmp; - if (!sg_res) { - unsigned int pgoff = sg->offset & ~PAGE_MASK; - - sg_res = aligned_nrpages(sg->offset, sg->length); - sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; - sg->dma_length = sg->length; - pteval = (sg_phys(sg) - pgoff) | attr; - phys_pfn = pteval >> VTD_PAGE_SHIFT; - } - if (!pte) { - largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, + phys_pfn, nr_pages); first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, _lvl); if (!pte) @@ -2328,7 +2311,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, pteval |= DMA_PTE_LARGE_PAGE; lvl_pages = lvl_to_nr_pages(largepage_lvl); - nr_superpages = sg_res / lvl_pages; + nr_superpages = nr_pages / lvl_pages; end_pfn =
[Intel-gfx] [PATCH v4 1/7] iommu: Handle freelists when using deferred flushing in iommu drivers
From: Tom Murphy Allow the iommu_unmap_fast to return newly freed page table pages and pass the freelist to queue_iova in the dma-iommu ops path. This is useful for iommu drivers (in this case the intel iommu driver) which need to wait for the ioTLB to be flushed before newly free/unmapped page table pages can be freed. This way we can still batch ioTLB free operations and handle the freelists. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 29 +-- drivers/iommu/intel/iommu.c | 55 - include/linux/iommu.h | 1 + 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index cd6e3c70ebb3..1b8ef3a2cbc3 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -50,6 +50,18 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; }; +static void iommu_dma_entry_dtor(unsigned long data) +{ + struct page *freelist = (struct page *)data; + + while (freelist) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) { if (cookie->type == IOMMU_DMA_IOVA_COOKIE) @@ -344,7 +356,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, if (!cookie->fq_domain && !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, ) && attr) { if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, - NULL)) + iommu_dma_entry_dtor)) pr_warn("iova flush queue initialization failed\n"); else cookie->fq_domain = domain; @@ -441,7 +453,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, } static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size) + dma_addr_t iova, size_t size, struct page *freelist) { struct iova_domain *iovad = >iovad; @@ -450,7 +462,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, cookie->msi_iova -= size; else if (cookie->fq_domain) /* non-strict mode */ queue_iova(iovad, iova_pfn(iovad, iova), - size >> iova_shift(iovad), 0); + size >> iova_shift(iovad), + (unsigned long)freelist); else free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); @@ -475,7 +488,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, if (!cookie->fq_domain) iommu_iotlb_sync(domain, _gather); - iommu_dma_free_iova(cookie, dma_addr, size); + iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist); } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, @@ -497,7 +510,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return DMA_MAPPING_ERROR; if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { - iommu_dma_free_iova(cookie, iova, size); + iommu_dma_free_iova(cookie, iova, size, NULL); return DMA_MAPPING_ERROR; } return iova + iova_off; @@ -649,7 +662,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, out_free_sg: sg_free_table(); out_free_iova: - iommu_dma_free_iova(cookie, iova, size); + iommu_dma_free_iova(cookie, iova, size, NULL); out_free_pages: __iommu_dma_free_pages(pages, count); return NULL; @@ -900,7 +913,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, return __finalise_sg(dev, sg, nents, iova); out_free_iova: - iommu_dma_free_iova(cookie, iova, iova_len); + iommu_dma_free_iova(cookie, iova, iova_len, NULL); out_restore_sg: __invalidate_sg(sg, nents); return 0; @@ -1194,7 +1207,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, return msi_page; out_free_iova: - iommu_dma_free_iova(cookie, iova, size); + iommu_dma_free_iova(cookie, iova, size, NULL); out_free_page: kfree(msi_page); return NULL; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 722545f61ba2..fdd514c8b2d4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1243,17 +1243,17 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, pages can only
[Intel-gfx] [PATCH v4 6/7] iommu/vt-d: Convert intel iommu driver to the iommu ops
From: Tom Murphy Convert the intel iommu driver to the dma-iommu api. Remove the iova handling and reserve region code from the intel iommu driver. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 742 ++-- 2 files changed, 43 insertions(+), 700 deletions(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 5337ee1584b0..28a3d1596c76 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -13,6 +13,7 @@ config INTEL_IOMMU select DMAR_TABLE select SWIOTLB select IOASID + select IOMMU_DMA help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7d3c73d1e498..69ccf92ab37b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -382,9 +382,6 @@ struct device_domain_info *get_domain_info(struct device *dev) DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); -#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \ - to_pci_dev(d)->untrusted) - /* * Iterate over elements in device_domain_list and call the specified * callback @fn against each element. @@ -1289,13 +1286,6 @@ static void dma_free_pagelist(struct page *freelist) } } -static void iova_entry_free(unsigned long data) -{ - struct page *freelist = (struct page *)data; - - dma_free_pagelist(freelist); -} - /* iommu handling */ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { @@ -1660,19 +1650,17 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, iommu_flush_write_buffer(iommu); } -static void iommu_flush_iova(struct iova_domain *iovad) +static void intel_flush_iotlb_all(struct iommu_domain *domain) { - struct dmar_domain *domain; + struct dmar_domain *dmar_domain = to_dmar_domain(domain); int idx; - domain = container_of(iovad, struct dmar_domain, iovad); - - for_each_domain_iommu(idx, domain) { + for_each_domain_iommu(idx, dmar_domain) { struct intel_iommu *iommu = g_iommus[idx]; - u16 did = domain->iommu_did[iommu->seq_id]; + u16 did = dmar_domain->iommu_did[iommu->seq_id]; - if (domain_use_first_level(domain)) - domain_flush_piotlb(iommu, domain, 0, -1, 0); + if (domain_use_first_level(dmar_domain)) + domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1954,48 +1942,6 @@ static int domain_detach_iommu(struct dmar_domain *domain, return count; } -static struct iova_domain reserved_iova_list; -static struct lock_class_key reserved_rbtree_key; - -static int dmar_init_reserved_ranges(void) -{ - struct pci_dev *pdev = NULL; - struct iova *iova; - int i; - - init_iova_domain(_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); - - lockdep_set_class(_iova_list.iova_rbtree_lock, - _rbtree_key); - - /* IOAPIC ranges shouldn't be accessed by DMA */ - iova = reserve_iova(_iova_list, IOVA_PFN(IOAPIC_RANGE_START), - IOVA_PFN(IOAPIC_RANGE_END)); - if (!iova) { - pr_err("Reserve IOAPIC range failed\n"); - return -ENODEV; - } - - /* Reserve all PCI MMIO to avoid peer-to-peer access */ - for_each_pci_dev(pdev) { - struct resource *r; - - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - r = >resource[i]; - if (!r->flags || !(r->flags & IORESOURCE_MEM)) - continue; - iova = reserve_iova(_iova_list, - IOVA_PFN(r->start), - IOVA_PFN(r->end)); - if (!iova) { - pci_err(pdev, "Reserve iova for %pR failed\n", r); - return -ENODEV; - } - } - } - return 0; -} - static inline int guestwidth_to_adjustwidth(int gaw) { int agaw; @@ -2018,7 +1964,7 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ if (domain->domain.type == IOMMU_DOMAIN_DMA) -
[Intel-gfx] [PATCH v4 4/7] iommu: Add quirk for Intel graphic devices in map_sg
Combining the sg segments exposes a bug in the Intel i915 driver which causes visual artifacts and the screen to freeze. This is most likely because of how the i915 handles the returned list. It probably doesn't respect the returned value specifying the number of elements in the list and instead depends on the previous behaviour of the Intel iommu driver which would return the same number of elements in the output list as in the input list. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 3526db774611..e7e4d758f51a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -879,6 +879,33 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); int i, count = 0; + /* +* The Intel graphic driver is used to assume that the returned +* sg list is not combound. This blocks the efforts of converting +* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the +* device driver work and should be removed once it's fixed in i915 +* driver. +*/ + if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) && + to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL && + (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) { + for_each_sg(sg, s, nents, i) { + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + s->offset += s_iova_off; + s->length = s_length; + sg_dma_address(s) = dma_addr + s_iova_off; + sg_dma_len(s) = s_length; + dma_addr += s_iova_len; + + pr_info_once("sg combining disabled due to i915 driver\n"); + } + + return nents; + } + for_each_sg(sg, s, nents, i) { /* Restore this segment's original unaligned fields first */ unsigned int s_iova_off = sg_dma_address(s); -- 2.17.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 2/7] iommu: Add iommu_dma_free_cpu_cached_iovas()
From: Tom Murphy Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which use the dma-iommu ops to free cached cpu iovas. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 9 + include/linux/dma-iommu.h | 8 2 files changed, 17 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1b8ef3a2cbc3..fb84cfa83703 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -50,6 +50,15 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; }; +void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = >iovad; + + free_cpu_cached_iovas(cpu, iovad); +} + static void iommu_dma_entry_dtor(unsigned long data) { struct page *freelist = (struct page *)data; diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 2112f21f73d8..706b68d1359b 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); +void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain); + #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; @@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ -- 2.17.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 5/7] iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev
The iommu-dma constrains IOVA allocation based on the domain geometry that the driver reports. Update domain geometry everytime a domain is attached to or detached from a device. Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index fdd514c8b2d4..7d3c73d1e498 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -67,8 +67,8 @@ #define MAX_AGAW_WIDTH 64 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) -#define __DOMAIN_MAX_PFN(gaw) uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) -#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << gaw) - 1) +#define __DOMAIN_MAX_PFN(gaw) uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1) +#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << (gaw)) - 1) /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR to match. That way, we can use 'unsigned long' for PFNs with impunity. */ @@ -739,6 +739,18 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) */ if (domain->nid == NUMA_NO_NODE) domain->nid = domain_update_device_node(domain); + + /* +* First-level translation restricts the input-address to a +* canonical address (i.e., address bits 63:N have the same +* value as address bit [N-1], where N is 48-bits with 4-level +* paging and 57-bits with 5-level paging). Hence, skip bit +* [N-1]. +*/ + if (domain_use_first_level(domain)) + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); + else + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, -- 2.17.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 3/7] iommu: Allow the dma-iommu api to use bounce buffers
From: Tom Murphy Allow the dma-iommu api to use bounce buffers for untrusted devices. This is a copy of the intel bounce buffer code. Signed-off-by: Tom Murphy Co-developed-by: Lu Baolu Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 163 +++--- 1 file changed, 150 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index fb84cfa83703..3526db774611 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -21,9 +21,11 @@ #include #include #include +#include #include #include #include +#include struct iommu_dma_msi_page { struct list_headlist; @@ -500,6 +502,31 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist); } +static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = >iovad; + phys_addr_t phys; + + phys = iommu_iova_to_phys(domain, dma_addr); + if (WARN_ON(!phys)) + return; + + __iommu_dma_unmap(dev, dma_addr, size); + + if (unlikely(is_swiotlb_buffer(phys))) + swiotlb_tbl_unmap_single(dev, phys, size, + iova_align(iovad, size), dir, attrs); +} + +static bool dev_is_untrusted(struct device *dev) +{ + return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; +} + static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot, u64 dma_mask) { @@ -525,6 +552,55 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return iova + iova_off; } +static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, + size_t org_size, dma_addr_t dma_mask, bool coherent, + enum dma_data_direction dir, unsigned long attrs) +{ + int prot = dma_info_to_prot(dir, coherent, attrs); + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = >iovad; + size_t aligned_size = org_size; + void *padding_start; + size_t padding_size; + dma_addr_t iova; + + /* +* If both the physical buffer start address and size are +* page aligned, we don't need to use a bounce page. +*/ + if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) && + iova_offset(iovad, phys | org_size)) { + aligned_size = iova_align(iovad, org_size); + phys = swiotlb_tbl_map_single(dev, + __phys_to_dma(dev, io_tlb_start), + phys, org_size, aligned_size, dir, attrs); + + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + /* Cleanup the padding area. */ + padding_start = phys_to_virt(phys); + padding_size = aligned_size; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || +dir == DMA_BIDIRECTIONAL)) { + padding_start += org_size; + padding_size -= org_size; + } + + memset(padding_start, 0, padding_size); + } + + iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask); + if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys)) + swiotlb_tbl_unmap_single(dev, phys, org_size, + aligned_size, dir, attrs); + + return iova; +} + static void __iommu_dma_free_pages(struct page **pages, int count) { while (count--) @@ -697,11 +773,15 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, { phys_addr_t phys; - if (dev_is_dma_coherent(dev)) + if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - arch_sync_dma_for_cpu(phys, size, dir); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, size, dir); + + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU); } static void iommu_dma_sync_single_for_device(struct device *dev, @@ -709,11 +789,15 @@ static void iommu_dma_sync_single_for_device(struct device *dev, { phys_addr_t phys; - if (dev_is_dma_coherent(dev)) + if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) return
[Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api
Hi, The previous post of this series could be found here. https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ This version introduce a new patch [4/7] to fix an issue reported here. https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ There aren't any other changes. Please help to test and review. Best regards, baolu Lu Baolu (3): iommu: Add quirk for Intel graphic devices in map_sg iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev iommu/vt-d: Cleanup after converting to dma-iommu ops Tom Murphy (4): iommu: Handle freelists when using deferred flushing in iommu drivers iommu: Add iommu_dma_free_cpu_cached_iovas() iommu: Allow the dma-iommu api to use bounce buffers iommu/vt-d: Convert intel iommu driver to the iommu ops .../admin-guide/kernel-parameters.txt | 5 - drivers/iommu/dma-iommu.c | 228 - drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 901 +++--- include/linux/dma-iommu.h | 8 + include/linux/iommu.h | 1 + 6 files changed, 336 insertions(+), 808 deletions(-) -- 2.17.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api
Hi Tvrtko, On 9/15/20 4:31 PM, Tvrtko Ursulin wrote: With the previous version of the series I hit a problem on Ivybridge where apparently the dma engine width is not respected. At least that is my layman interpretation of the errors. From the older thread: <3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not sufficient for the mapped address (008000) Relevant iommu boot related messages are: <6>[ 0.184234] DMAR: Host address width 36 <6>[ 0.184245] DMAR: DRHD base: 0x00fed9 flags: 0x0 <6>[ 0.184288] DMAR: dmar0: reg_base_addr fed9 ver 1:0 cap c020e60262 ecap f0101a <6>[ 0.184308] DMAR: DRHD base: 0x00fed91000 flags: 0x1 <6>[ 0.184337] DMAR: dmar1: reg_base_addr fed91000 ver 1:0 cap c9008020660262 ecap f0105a <6>[ 0.184357] DMAR: RMRR base: 0x00d8d28000 end: 0x00d8d46fff <6>[ 0.184377] DMAR: RMRR base: 0x00db00 end: 0x00df1f <6>[ 0.184398] DMAR-IR: IOAPIC id 2 under DRHD base 0xfed91000 IOMMU 1 <6>[ 0.184414] DMAR-IR: HPET id 0 under DRHD base 0xfed91000 <6>[ 0.184428] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping. <6>[ 0.185173] DMAR-IR: Enabled IRQ remapping in x2apic mode <6>[ 0.878934] DMAR: No ATSR found <6>[ 0.878966] DMAR: dmar0: Using Queued invalidation <6>[ 0.879007] DMAR: dmar1: Using Queued invalidation <6>[ 0.915032] DMAR: Intel(R) Virtualization Technology for Directed I/O <6>[ 0.915060] PCI-DMA: Using software bounce buffering for IO (SWIOTLB) <6>[ 0.915084] software IO TLB: mapped [mem 0xc80d4000-0xcc0d4000] (64MB) (Full boot log at https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/boot0.txt, failures at https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/igt@i915_selftest@l...@blt.html.) Does this look familiar or at least plausible to you? Is this something your new series has fixed? This happens during attaching a domain to device. It has nothing to do with this patch series. I will look into this issue, but not in this email thread context. I am not sure what step is attaching domain to device, but these type messages: <3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not >> sufficient for the mapped address (008000) They definitely appear to happen at runtime, as i915 is getting exercised by userspace. Can you please check whether below change helps here? diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c8323a9f8bde..0484c539debc 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -724,6 +724,7 @@ static int domain_update_device_node(struct dmar_domain *domain) /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { + domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw); domain_update_iommu_coherency(domain); domain->iommu_snooping = domain_update_iommu_snooping(NULL); domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api
On 9/22/20 7:05 PM, Robin Murphy wrote: With the previous version of the series I hit a problem on Ivybridge where apparently the dma engine width is not respected. At least that is my layman interpretation of the errors. From the older thread: <3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not sufficient for the mapped address (008000) Relevant iommu boot related messages are: <6>[ 0.184234] DMAR: Host address width 36 <6>[ 0.184245] DMAR: DRHD base: 0x00fed9 flags: 0x0 <6>[ 0.184288] DMAR: dmar0: reg_base_addr fed9 ver 1:0 cap c020e60262 ecap f0101a <6>[ 0.184308] DMAR: DRHD base: 0x00fed91000 flags: 0x1 <6>[ 0.184337] DMAR: dmar1: reg_base_addr fed91000 ver 1:0 cap c9008020660262 ecap f0105a <6>[ 0.184357] DMAR: RMRR base: 0x00d8d28000 end: 0x00d8d46fff <6>[ 0.184377] DMAR: RMRR base: 0x00db00 end: 0x00df1f <6>[ 0.184398] DMAR-IR: IOAPIC id 2 under DRHD base 0xfed91000 IOMMU 1 <6>[ 0.184414] DMAR-IR: HPET id 0 under DRHD base 0xfed91000 <6>[ 0.184428] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping. <6>[ 0.185173] DMAR-IR: Enabled IRQ remapping in x2apic mode <6>[ 0.878934] DMAR: No ATSR found <6>[ 0.878966] DMAR: dmar0: Using Queued invalidation <6>[ 0.879007] DMAR: dmar1: Using Queued invalidation <6>[ 0.915032] DMAR: Intel(R) Virtualization Technology for Directed I/O <6>[ 0.915060] PCI-DMA: Using software bounce buffering for IO (SWIOTLB) <6>[ 0.915084] software IO TLB: mapped [mem 0xc80d4000-0xcc0d4000] (64MB) (Full boot log at https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/boot0.txt, failures at https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/igt@i915_selftest@l...@blt.html.) Does this look familiar or at least plausible to you? Is this something your new series has fixed? This happens during attaching a domain to device. It has nothing to do with this patch series. I will look into this issue, but not in this email thread context. I am not sure what step is attaching domain to device, but these type messages: <3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not >> sufficient for the mapped address (008000) They definitely appear to happen at runtime, as i915 is getting exercised by userspace. AFAICS this certainly might be related to this series - iommu-dma will Oh! I looked at the wrong function. prepare_domain_attach_device() prints a similar message which made me believe that it was not caused by the this patches series. constrain IOVA allocation based on the domain geometry that the driver reports, which in this case is set only once when first allocating the domain. Thus it looks like both the dmar_domain->gaw adjustment in prepare_domain_attach_device() and the domain_use_first_level() business in intel_alloc_iova() effectively get lost in this conversion, since the domain geometry never gets updated to reflect those additional constraints. Sounds reasonable. I will look into the code and work out a fix. > Robin. Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api
Hi Logan, On 9/21/20 11:48 PM, Logan Gunthorpe wrote: On 2020-09-20 12:36 a.m., Lu Baolu wrote: Hi Logan, On 2020/9/19 4:47, Logan Gunthorpe wrote: Hi Lu, On 2020-09-11 9:21 p.m., Lu Baolu wrote: Tom Murphy has almost done all the work. His latest patch series was posted here. https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/ Thanks a lot! This series is a follow-up with below changes: 1. Add a quirk for the i915 driver issue described in Tom's cover letter. 2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use bounce buffers" to make the bounce buffer work for untrusted devices. 3. Several cleanups in iommu/vt-d driver after the conversion. I'm trying to test this on an old Sandy Bridge, but found that I get spammed with warnings on boot. I've put a sample of a few of them below. They all seem to be related to ioat. I had the same issue with Tom's v2 but never saw this on his v1. Have you verified whether this could be reproduced with the lasted upstream kernel (without this patch series)? Yes. I am sorry. Just want to make sure I understand you correctly. :-) When you say "yes", do you mean you could reproduce this with pure upstream kernel (5.9-rc6)? Also, it's hitting a warning in the dma-iommu code which would not be hit without this patch set. Without this series, DMA APIs don't go through dma-iommu. Do you mind posting the warning message? Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api
Hi Logan, On 2020/9/19 4:47, Logan Gunthorpe wrote: Hi Lu, On 2020-09-11 9:21 p.m., Lu Baolu wrote: Tom Murphy has almost done all the work. His latest patch series was posted here. https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/ Thanks a lot! This series is a follow-up with below changes: 1. Add a quirk for the i915 driver issue described in Tom's cover letter. 2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use bounce buffers" to make the bounce buffer work for untrusted devices. 3. Several cleanups in iommu/vt-d driver after the conversion. I'm trying to test this on an old Sandy Bridge, but found that I get spammed with warnings on boot. I've put a sample of a few of them below. They all seem to be related to ioat. I had the same issue with Tom's v2 but never saw this on his v1. Have you verified whether this could be reproduced with the lasted upstream kernel (without this patch series)? Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api
Hi Tvrtko, On 9/14/20 4:04 PM, Tvrtko Ursulin wrote: Hi, On 12/09/2020 04:21, Lu Baolu wrote: Tom Murphy has almost done all the work. His latest patch series was posted here. https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/ Thanks a lot! This series is a follow-up with below changes: 1. Add a quirk for the i915 driver issue described in Tom's cover letter. Last week I have copied you on an i915 series which appears to remove the need for this quirk. so if we get those i915 patches reviewed and merged, do you still want to pursue this quirk? It's up to the graphic guys. I don't know the details in i915 driver. I don't think my tests could cover all cases. 2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use bounce buffers" to make the bounce buffer work for untrusted devices. 3. Several cleanups in iommu/vt-d driver after the conversion. With the previous version of the series I hit a problem on Ivybridge where apparently the dma engine width is not respected. At least that is my layman interpretation of the errors. From the older thread: <3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not sufficient for the mapped address (008000) Relevant iommu boot related messages are: <6>[0.184234] DMAR: Host address width 36 <6>[0.184245] DMAR: DRHD base: 0x00fed9 flags: 0x0 <6>[0.184288] DMAR: dmar0: reg_base_addr fed9 ver 1:0 cap c020e60262 ecap f0101a <6>[0.184308] DMAR: DRHD base: 0x00fed91000 flags: 0x1 <6>[0.184337] DMAR: dmar1: reg_base_addr fed91000 ver 1:0 cap c9008020660262 ecap f0105a <6>[0.184357] DMAR: RMRR base: 0x00d8d28000 end: 0x00d8d46fff <6>[0.184377] DMAR: RMRR base: 0x00db00 end: 0x00df1f <6>[0.184398] DMAR-IR: IOAPIC id 2 under DRHD base 0xfed91000 IOMMU 1 <6>[0.184414] DMAR-IR: HPET id 0 under DRHD base 0xfed91000 <6>[0.184428] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping. <6>[0.185173] DMAR-IR: Enabled IRQ remapping in x2apic mode <6>[0.878934] DMAR: No ATSR found <6>[0.878966] DMAR: dmar0: Using Queued invalidation <6>[0.879007] DMAR: dmar1: Using Queued invalidation <6>[0.915032] DMAR: Intel(R) Virtualization Technology for Directed I/O <6>[0.915060] PCI-DMA: Using software bounce buffering for IO (SWIOTLB) <6>[0.915084] software IO TLB: mapped [mem 0xc80d4000-0xcc0d4000] (64MB) (Full boot log at https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/boot0.txt, failures at https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/igt@i915_selftest@l...@blt.html.) Does this look familiar or at least plausible to you? Is this something your new series has fixed? This happens during attaching a domain to device. It has nothing to do with this patch series. I will look into this issue, but not in this email thread context. Best regards, baolu Regards, Tvrtko Please review and test. Best regards, baolu Lu Baolu (2): iommu: Add quirk for Intel graphic devices in map_sg iommu/vt-d: Cleanup after converting to dma-iommu ops Tom Murphy (4): iommu: Handle freelists when using deferred flushing in iommu drivers iommu: Add iommu_dma_free_cpu_cached_iovas() iommu: Allow the dma-iommu api to use bounce buffers iommu/vt-d: Convert intel iommu driver to the iommu ops .../admin-guide/kernel-parameters.txt | 5 - drivers/iommu/dma-iommu.c | 229 - drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 885 +++--- include/linux/dma-iommu.h | 8 + include/linux/iommu.h | 1 + 6 files changed, 323 insertions(+), 806 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 4/6] iommu: Add quirk for Intel graphic devices in map_sg
Combining the sg segments exposes a bug in the Intel i915 driver which causes visual artifacts and the screen to freeze. This is most likely because of how the i915 handles the returned list. It probably doesn't respect the returned value specifying the number of elements in the list and instead depends on the previous behaviour of the Intel iommu driver which would return the same number of elements in the output list as in the input list. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1a1da22e5a5e..fc19f1fb9413 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -880,6 +880,33 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); int i, count = 0; + /* +* The Intel graphic driver is used to assume that the returned +* sg list is not combound. This blocks the efforts of converting +* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the +* device driver work and should be removed once it's fixed in i915 +* driver. +*/ + if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) && + to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL && + (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) { + for_each_sg(sg, s, nents, i) { + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + s->offset += s_iova_off; + s->length = s_length; + sg_dma_address(s) = dma_addr + s_iova_off; + sg_dma_len(s) = s_length; + dma_addr += s_iova_len; + + pr_info_once("sg combining disabled due to i915 driver\n"); + } + + return nents; + } + for_each_sg(sg, s, nents, i) { /* Restore this segment's original unaligned fields first */ unsigned int s_iova_off = sg_dma_address(s); -- 2.17.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 6/6] iommu/vt-d: Cleanup after converting to dma-iommu ops
Some cleanups after converting the driver to use dma-iommu ops. - Remove nobounce option; - Cleanup and simplify the path in domain mapping. Signed-off-by: Lu Baolu --- .../admin-guide/kernel-parameters.txt | 5 -- drivers/iommu/intel/iommu.c | 90 ++- 2 files changed, 28 insertions(+), 67 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1068742a6df..0d11ef43d314 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1866,11 +1866,6 @@ Note that using this option lowers the security provided by tboot because it makes the system vulnerable to DMA attacks. - nobounce [Default off] - Disable bounce buffer for untrusted devices such as - the Thunderbolt devices. This will treat the untrusted - devices as the trusted ones, hence might expose security - risks of DMA attacks. intel_idle.max_cstate= [KNL,HW,ACPI,X86] 0 disables intel_idle and fall back on acpi_idle. diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index adc231790e0a..fe2544c95013 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -355,7 +355,6 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; -static int intel_no_bounce; static int iommu_skip_te_disable; #define IDENTMAP_GFX 2 @@ -457,9 +456,6 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "tboot_noforce", 13)) { pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); intel_iommu_tboot_noforce = 1; - } else if (!strncmp(str, "nobounce", 8)) { - pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); - intel_no_bounce = 1; } str += strcspn(str, ","); @@ -2230,15 +2226,14 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain, return level; } -static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - struct scatterlist *sg, unsigned long phys_pfn, - unsigned long nr_pages, int prot) +static int +__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, +unsigned long phys_pfn, unsigned long nr_pages, int prot) { struct dma_pte *first_pte = NULL, *pte = NULL; - phys_addr_t pteval; - unsigned long sg_res = 0; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + phys_addr_t pteval; u64 attr; BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); @@ -2250,26 +2245,14 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (domain_use_first_level(domain)) attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; - if (!sg) { - sg_res = nr_pages; - pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; - } + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; while (nr_pages > 0) { uint64_t tmp; - if (!sg_res) { - unsigned int pgoff = sg->offset & ~PAGE_MASK; - - sg_res = aligned_nrpages(sg->offset, sg->length); - sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; - sg->dma_length = sg->length; - pteval = (sg_phys(sg) - pgoff) | attr; - phys_pfn = pteval >> VTD_PAGE_SHIFT; - } - if (!pte) { - largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, + phys_pfn, nr_pages); first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, _lvl); if (!pte) @@ -2281,7 +2264,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, pteval |= DMA_PTE_LARGE_PAGE; lvl_pages = lvl_to_nr_pages(largepage_lvl); - nr_superpages = sg_res / lvl_pages; + nr_superpages = nr_pages / lvl_pages; end_pfn =
[Intel-gfx] [PATCH v3 2/6] iommu: Add iommu_dma_free_cpu_cached_iovas()
From: Tom Murphy Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which use the dma-iommu ops to free cached cpu iovas. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 9 + include/linux/dma-iommu.h | 8 2 files changed, 17 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 82c071b2d5c8..d06411bd5e08 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -50,6 +50,15 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; }; +void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = >iovad; + + free_cpu_cached_iovas(cpu, iovad); +} + static void iommu_dma_entry_dtor(unsigned long data) { struct page *freelist = (struct page *)data; diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 2112f21f73d8..706b68d1359b 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); +void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain); + #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; @@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ -- 2.17.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 5/6] iommu/vt-d: Convert intel iommu driver to the iommu ops
From: Tom Murphy Convert the intel iommu driver to the dma-iommu api. Remove the iova handling and reserve region code from the intel iommu driver. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 742 ++-- 2 files changed, 43 insertions(+), 700 deletions(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 5337ee1584b0..28a3d1596c76 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -13,6 +13,7 @@ config INTEL_IOMMU select DMAR_TABLE select SWIOTLB select IOASID + select IOMMU_DMA help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 63ee30c689a7..adc231790e0a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -382,9 +382,6 @@ struct device_domain_info *get_domain_info(struct device *dev) DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); -#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \ - to_pci_dev(d)->untrusted) - /* * Iterate over elements in device_domain_list and call the specified * callback @fn against each element. @@ -1242,13 +1239,6 @@ static void dma_free_pagelist(struct page *freelist) } } -static void iova_entry_free(unsigned long data) -{ - struct page *freelist = (struct page *)data; - - dma_free_pagelist(freelist); -} - /* iommu handling */ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { @@ -1613,19 +1603,17 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, iommu_flush_write_buffer(iommu); } -static void iommu_flush_iova(struct iova_domain *iovad) +static void intel_flush_iotlb_all(struct iommu_domain *domain) { - struct dmar_domain *domain; + struct dmar_domain *dmar_domain = to_dmar_domain(domain); int idx; - domain = container_of(iovad, struct dmar_domain, iovad); - - for_each_domain_iommu(idx, domain) { + for_each_domain_iommu(idx, dmar_domain) { struct intel_iommu *iommu = g_iommus[idx]; - u16 did = domain->iommu_did[iommu->seq_id]; + u16 did = dmar_domain->iommu_did[iommu->seq_id]; - if (domain_use_first_level(domain)) - domain_flush_piotlb(iommu, domain, 0, -1, 0); + if (domain_use_first_level(dmar_domain)) + domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1907,48 +1895,6 @@ static int domain_detach_iommu(struct dmar_domain *domain, return count; } -static struct iova_domain reserved_iova_list; -static struct lock_class_key reserved_rbtree_key; - -static int dmar_init_reserved_ranges(void) -{ - struct pci_dev *pdev = NULL; - struct iova *iova; - int i; - - init_iova_domain(_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); - - lockdep_set_class(_iova_list.iova_rbtree_lock, - _rbtree_key); - - /* IOAPIC ranges shouldn't be accessed by DMA */ - iova = reserve_iova(_iova_list, IOVA_PFN(IOAPIC_RANGE_START), - IOVA_PFN(IOAPIC_RANGE_END)); - if (!iova) { - pr_err("Reserve IOAPIC range failed\n"); - return -ENODEV; - } - - /* Reserve all PCI MMIO to avoid peer-to-peer access */ - for_each_pci_dev(pdev) { - struct resource *r; - - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - r = >resource[i]; - if (!r->flags || !(r->flags & IORESOURCE_MEM)) - continue; - iova = reserve_iova(_iova_list, - IOVA_PFN(r->start), - IOVA_PFN(r->end)); - if (!iova) { - pci_err(pdev, "Reserve iova for %pR failed\n", r); - return -ENODEV; - } - } - } - return 0; -} - static inline int guestwidth_to_adjustwidth(int gaw) { int agaw; @@ -1971,7 +1917,7 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ if (domain->domain.type == IOMMU_DOMAIN_DMA) -
[Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api
Tom Murphy has almost done all the work. His latest patch series was posted here. https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/ Thanks a lot! This series is a follow-up with below changes: 1. Add a quirk for the i915 driver issue described in Tom's cover letter. 2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use bounce buffers" to make the bounce buffer work for untrusted devices. 3. Several cleanups in iommu/vt-d driver after the conversion. Please review and test. Best regards, baolu Lu Baolu (2): iommu: Add quirk for Intel graphic devices in map_sg iommu/vt-d: Cleanup after converting to dma-iommu ops Tom Murphy (4): iommu: Handle freelists when using deferred flushing in iommu drivers iommu: Add iommu_dma_free_cpu_cached_iovas() iommu: Allow the dma-iommu api to use bounce buffers iommu/vt-d: Convert intel iommu driver to the iommu ops .../admin-guide/kernel-parameters.txt | 5 - drivers/iommu/dma-iommu.c | 229 - drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 885 +++--- include/linux/dma-iommu.h | 8 + include/linux/iommu.h | 1 + 6 files changed, 323 insertions(+), 806 deletions(-) -- 2.17.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 3/6] iommu: Allow the dma-iommu api to use bounce buffers
From: Tom Murphy Allow the dma-iommu api to use bounce buffers for untrusted devices. This is a copy of the intel bounce buffer code. Signed-off-by: Tom Murphy Co-developed-by: Lu Baolu Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 163 +++--- 1 file changed, 150 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d06411bd5e08..1a1da22e5a5e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -21,9 +21,11 @@ #include #include #include +#include #include #include #include +#include struct iommu_dma_msi_page { struct list_headlist; @@ -498,6 +500,31 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist); } +static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = >iovad; + phys_addr_t phys; + + phys = iommu_iova_to_phys(domain, dma_addr); + if (WARN_ON(!phys)) + return; + + __iommu_dma_unmap(dev, dma_addr, size); + + if (unlikely(is_swiotlb_buffer(phys))) + swiotlb_tbl_unmap_single(dev, phys, size, + iova_align(iovad, size), dir, attrs); +} + +static bool dev_is_untrusted(struct device *dev) +{ + return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; +} + static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot, u64 dma_mask) { @@ -523,6 +550,55 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return iova + iova_off; } +static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, + size_t org_size, dma_addr_t dma_mask, bool coherent, + enum dma_data_direction dir, unsigned long attrs) +{ + int prot = dma_info_to_prot(dir, coherent, attrs); + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = >iovad; + size_t aligned_size = org_size; + void *padding_start; + size_t padding_size; + dma_addr_t iova; + + /* +* If both the physical buffer start address and size are +* page aligned, we don't need to use a bounce page. +*/ + if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) && + iova_offset(iovad, phys | org_size)) { + aligned_size = iova_align(iovad, org_size); + phys = swiotlb_tbl_map_single(dev, + __phys_to_dma(dev, io_tlb_start), + phys, org_size, aligned_size, dir, attrs); + + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + /* Cleanup the padding area. */ + padding_start = phys_to_virt(phys); + padding_size = aligned_size; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || +dir == DMA_BIDIRECTIONAL)) { + padding_start += org_size; + padding_size -= org_size; + } + + memset(padding_start, 0, padding_size); + } + + iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask); + if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys)) + swiotlb_tbl_unmap_single(dev, phys, org_size, + aligned_size, dir, attrs); + + return iova; +} + static void __iommu_dma_free_pages(struct page **pages, int count) { while (count--) @@ -698,11 +774,15 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, { phys_addr_t phys; - if (dev_is_dma_coherent(dev)) + if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - arch_sync_dma_for_cpu(phys, size, dir); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, size, dir); + + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU); } static void iommu_dma_sync_single_for_device(struct device *dev, @@ -710,11 +790,15 @@ static void iommu_dma_sync_single_for_device(struct device *dev, { phys_addr_t phys; - if (dev_is_dma_coherent(dev)) + if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) return
[Intel-gfx] [PATCH v3 1/6] iommu: Handle freelists when using deferred flushing in iommu drivers
From: Tom Murphy Allow the iommu_unmap_fast to return newly freed page table pages and pass the freelist to queue_iova in the dma-iommu ops path. This is useful for iommu drivers (in this case the intel iommu driver) which need to wait for the ioTLB to be flushed before newly free/unmapped page table pages can be freed. This way we can still batch ioTLB free operations and handle the freelists. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu --- drivers/iommu/dma-iommu.c | 30 ++-- drivers/iommu/intel/iommu.c | 55 - include/linux/iommu.h | 1 + 3 files changed, 59 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5141d49a046b..82c071b2d5c8 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -50,6 +50,18 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; }; +static void iommu_dma_entry_dtor(unsigned long data) +{ + struct page *freelist = (struct page *)data; + + while (freelist) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) { if (cookie->type == IOMMU_DMA_IOVA_COOKIE) @@ -344,7 +356,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, if (!cookie->fq_domain && !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, ) && attr) { cookie->fq_domain = domain; - init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL); + init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, + iommu_dma_entry_dtor); } if (!dev) @@ -438,7 +451,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, } static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size) + dma_addr_t iova, size_t size, struct page *freelist) { struct iova_domain *iovad = >iovad; @@ -447,7 +460,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, cookie->msi_iova -= size; else if (cookie->fq_domain) /* non-strict mode */ queue_iova(iovad, iova_pfn(iovad, iova), - size >> iova_shift(iovad), 0); + size >> iova_shift(iovad), + (unsigned long)freelist); else free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); @@ -472,7 +486,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, if (!cookie->fq_domain) iommu_tlb_sync(domain, _gather); - iommu_dma_free_iova(cookie, dma_addr, size); + iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist); } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, @@ -494,7 +508,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return DMA_MAPPING_ERROR; if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { - iommu_dma_free_iova(cookie, iova, size); + iommu_dma_free_iova(cookie, iova, size, NULL); return DMA_MAPPING_ERROR; } return iova + iova_off; @@ -649,7 +663,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, out_free_sg: sg_free_table(); out_free_iova: - iommu_dma_free_iova(cookie, iova, size); + iommu_dma_free_iova(cookie, iova, size, NULL); out_free_pages: __iommu_dma_free_pages(pages, count); return NULL; @@ -900,7 +914,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, return __finalise_sg(dev, sg, nents, iova); out_free_iova: - iommu_dma_free_iova(cookie, iova, iova_len); + iommu_dma_free_iova(cookie, iova, iova_len, NULL); out_restore_sg: __invalidate_sg(sg, nents); return 0; @@ -1194,7 +1208,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, return msi_page; out_free_iova: - iommu_dma_free_iova(cookie, iova, size); + iommu_dma_free_iova(cookie, iova, size, NULL); out_free_page: kfree(msi_page); return NULL; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 87b17bac04c2..63ee30c689a7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1208,17 +1208,17 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, pages can only be freed after the IOTLB flush has been done. */ s
Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu
On 2020/9/9 15:06, Christoph Hellwig wrote: On Wed, Sep 09, 2020 at 09:43:09AM +0800, Lu Baolu wrote: + /* +* The Intel graphic device driver is used to assume that the returned +* sg list is not combound. This blocks the efforts of converting the This adds pointless overly long lines. +* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the +* device driver work and should be removed once it's fixed in i915 +* driver. +*/ + if (dev_is_pci(dev) && + to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL && + (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) { + for_each_sg(sg, s, nents, i) { + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + s->offset += s_iova_off; + s->length = s_length; + sg_dma_address(s) = dma_addr + s_iova_off; + sg_dma_len(s) = s_length; + dma_addr += s_iova_len; + } + + return nents; + } This wants an IS_ENABLED() check. And probably a pr_once reminding of the workaround. Will fix in the next version. Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu
Hi Christoph, On 9/8/20 2:23 PM, Christoph Hellwig wrote: On Tue, Sep 08, 2020 at 02:04:53PM +0800, Lu Baolu wrote: Do you mind telling where can I find Marek's series? [PATCH v10 00/30] DRM: fix struct sg_table nents vs. orig_nents misuse on various lists including the iommu one. It seems that more work is needed in i915 driver. I will added below quirk as you suggested. --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -851,6 +851,31 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); int i, count = 0; + /* +* The Intel graphic device driver is used to assume that the returned +* sg list is not combound. This blocks the efforts of converting the +* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the +* device driver work and should be removed once it's fixed in i915 +* driver. +*/ + if (dev_is_pci(dev) && + to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL && + (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) { + for_each_sg(sg, s, nents, i) { + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + s->offset += s_iova_off; + s->length = s_length; + sg_dma_address(s) = dma_addr + s_iova_off; + sg_dma_len(s) = s_length; + dma_addr += s_iova_len; + } + + return nents; + } + Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu
On 2020/9/8 14:23, Christoph Hellwig wrote: On Tue, Sep 08, 2020 at 02:04:53PM +0800, Lu Baolu wrote: Do you mind telling where can I find Marek's series? [PATCH v10 00/30] DRM: fix struct sg_table nents vs. orig_nents misuse on various lists including the iommu one. Get it. Thank you! Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu
Hi Christoph, On 9/8/20 1:55 PM, Christoph Hellwig wrote: On Tue, Sep 08, 2020 at 06:36:19AM +0100, Christoph Hellwig wrote: On Mon, Sep 07, 2020 at 09:18:50PM +0100, Tom Murphy wrote: Yeah we talked about passing an attr to map_sg to disable merging at the following microconfernce: https://linuxplumbersconf.org/event/7/contributions/846/ As far as I can remember everyone seemed happy with that solution. I won't be working on this though as I don't have any more time to dedicate to this. It seems Lu Baolu will take over this. I'm absolutely again passing a flag. Tha just invites further abuse. We need a PCI ID based quirk or something else that can't be as easily abused. Also, I looked at i915 and there are just three dma_map_sg callers. The two dmabuf related ones are fixed by Marek in his series, leaving Do you mind telling where can I find Marek's series? Best regards, baolu just the one in i915_gem_gtt_prepare_pages, which does indeed look very fishy. But if that one is so hard to fix it can just be replaced by an open coded for_each_sg loop that contains manual dma_map_page calls. ___ iommu mailing list io...@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] iommu/intel: Handle 36b addressing for x86-32
Hi Chris, On 2020/8/23 0:02, Chris Wilson wrote: Beware that the address size for x86-32 may exceed unsigned long. [0.368971] UBSAN: shift-out-of-bounds in drivers/iommu/intel/iommu.c:128:14 [0.369055] shift exponent 36 is too large for 32-bit type 'long unsigned int' If we don't handle the wide addresses, the pages are mismapped and the device read/writes go astray, detected as DMAR faults and leading to device failure. The behaviour changed (from working to broken) in commit fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer"), but commit ("iommu/vt-d: Delegate the dma domain to upper layer") and adjust the title as "iommu/vt-d: Handle 36bit addressing for x86-32" with above two changes, Acked-by: Lu Baolu Best regards, baolu the error looks older. Fixes: fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer") Signed-off-by: Chris Wilson Cc: James Sewart Cc: Lu Baolu Cc: Joerg Roedel Cc: # v5.3+ --- drivers/iommu/intel/iommu.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2e9c8c3d0da4..ba78a2e854f9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -123,29 +123,29 @@ static inline unsigned int level_to_offset_bits(int level) return (level - 1) * LEVEL_STRIDE; } -static inline int pfn_level_offset(unsigned long pfn, int level) +static inline int pfn_level_offset(u64 pfn, int level) { return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; } -static inline unsigned long level_mask(int level) +static inline u64 level_mask(int level) { - return -1UL << level_to_offset_bits(level); + return -1ULL << level_to_offset_bits(level); } -static inline unsigned long level_size(int level) +static inline u64 level_size(int level) { - return 1UL << level_to_offset_bits(level); + return 1ULL << level_to_offset_bits(level); } -static inline unsigned long align_to_level(unsigned long pfn, int level) +static inline u64 align_to_level(u64 pfn, int level) { return (pfn + level_size(level) - 1) & level_mask(level); } static inline unsigned long lvl_to_nr_pages(unsigned int lvl) { - return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); + return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); } /* VT-d pages must always be _smaller_ than MM pages. Otherwise things ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 02/13] iommu/vt-d: Use dev_iommu_priv_get/set()
Hi Joerg, On 2020/6/25 21:08, Joerg Roedel wrote: From: Joerg Roedel Remove the use of dev->archdata.iommu and use the private per-device pointer provided by IOMMU core code instead. Signed-off-by: Joerg Roedel --- .../gpu/drm/i915/selftests/mock_gem_device.c | 10 -- drivers/iommu/intel/iommu.c| 18 +- For changes in VT-d driver, Reviewed-by: Lu Baolu Best regards, baolu 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 9b105b811f1f..e08601905a64 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -24,6 +24,7 @@ #include #include +#include #include @@ -118,6 +119,9 @@ struct drm_i915_private *mock_gem_device(void) { struct drm_i915_private *i915; struct pci_dev *pdev; +#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) + struct dev_iommu iommu; +#endif int err; pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); @@ -136,8 +140,10 @@ struct drm_i915_private *mock_gem_device(void) dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(64)); #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) - /* hack to disable iommu for the fake device; force identity mapping */ - pdev->dev.archdata.iommu = (void *)-1; + /* HACK HACK HACK to disable iommu for the fake device; force identity mapping */ + memset(, 0, sizeof(iommu)); + iommu.priv = (void *)-1; + pdev->dev.iommu = #endif pci_set_drvdata(pdev, i915); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d759e7234e98..2ce490c2eab8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -372,7 +372,7 @@ struct device_domain_info *get_domain_info(struct device *dev) if (!dev) return NULL; - info = dev->archdata.iommu; + info = dev_iommu_priv_get(dev); if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || info == DEFER_DEVICE_DOMAIN_INFO)) return NULL; @@ -743,12 +743,12 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, static int iommu_dummy(struct device *dev) { - return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; + return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO; } static bool attach_deferred(struct device *dev) { - return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; + return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO; } /** @@ -2420,7 +2420,7 @@ static inline void unlink_domain_info(struct device_domain_info *info) list_del(>link); list_del(>global); if (info->dev) - info->dev->archdata.iommu = NULL; + dev_iommu_priv_set(info->dev, NULL); } static void domain_remove_dev_info(struct dmar_domain *domain) @@ -2453,7 +2453,7 @@ static void do_deferred_attach(struct device *dev) { struct iommu_domain *domain; - dev->archdata.iommu = NULL; + dev_iommu_priv_set(dev, NULL); domain = iommu_get_domain_for_dev(dev); if (domain) intel_iommu_attach_device(domain, dev); @@ -2599,7 +2599,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, list_add(>link, >devices); list_add(>global, _domain_list); if (dev) - dev->archdata.iommu = info; + dev_iommu_priv_set(dev, info); spin_unlock_irqrestore(_domain_lock, flags); /* PASID table is mandatory for a PCI device in scalable mode. */ @@ -4004,7 +4004,7 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + dev_iommu_priv_set(>dev, DUMMY_DEVICE_DOMAIN_INFO); } } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); @@ -4043,7 +4043,7 @@ static void __init init_no_remapping_devices(void) drhd->ignored = 1; for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) - dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + dev_iommu_priv_set(dev, DUMMY_DEVICE_DOMAIN_INFO); } } } @@ -5665,7 +5665,7 @@ static stru
Re: [Intel-gfx] [PATCH 3/8] iommu/vt-d: Remove IOVA handling code from non-dma_ops path
On 2020/3/20 14:30, Tom Murphy wrote: Could we merge patch 1-3 from this series? it just cleans up weird code and merging these patches will cover some of the work needed to move the intel iommu driver to the dma-iommu api in the future. Can you please take a look at this patch series? https://lkml.org/lkml/2020/3/13/1162 It probably makes this series easier. Best regards, baolu On Sat, 21 Dec 2019 at 07:04, Tom Murphy wrote: Remove all IOVA handling code from the non-dma_ops path in the intel iommu driver. There's no need for the non-dma_ops path to keep track of IOVAs. The whole point of the non-dma_ops path is that it allows the IOVAs to be handled separately. The IOVA handling code removed in this patch is pointless. Signed-off-by: Tom Murphy ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/8] iommu/vt-d: clean up 32bit si_domain assignment
Hi, On 12/21/19 11:03 PM, Tom Murphy wrote: @@ -5618,9 +5583,13 @@ static int intel_iommu_add_device(struct device *dev) struct iommu_domain *domain; struct intel_iommu *iommu; struct iommu_group *group; + u64 dma_mask = *dev->dma_mask; u8 bus, devfn; int ret; + if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) + dma_mask = dev->coherent_dma_mask; + iommu = device_to_iommu(dev, , ); if (!iommu) return -ENODEV; @@ -5640,7 +5609,12 @@ static int intel_iommu_add_device(struct device *dev) domain = iommu_get_domain_for_dev(dev); dmar_domain = to_dmar_domain(domain); if (domain->type == IOMMU_DOMAIN_DMA) { - if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { + /* +* We check dma_mask >= dma_get_required_mask(dev) because +* 32 bit DMA falls back to non-identity mapping. +*/ + if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY && + dma_mask >= dma_get_required_mask(dev)) { ret = iommu_request_dm_for_dev(dev); if (ret) { dmar_remove_one_dev_info(dev); dev->dma_mask is set to 32bit by default. During loading driver, it sets the real dma_mask with dma_set_mask() according to the real capability. Here you will always see 32bit dma_mask for each device. Best regards, baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi Janusz, On 9/3/19 3:41 PM, Janusz Krzysztofik wrote: Hi Baolu, On Tuesday, September 3, 2019 3:29:40 AM CEST Lu Baolu wrote: Hi Janusz, On 9/2/19 4:37 PM, Janusz Krzysztofik wrote: I am not saying that keeping data is not acceptable. I just want to check whether there are any other solutions. Then reverting 458b7c8e0dde and applying this patch still resolves the issue for me. No errors appear when mappings are unmapped on device close after the device has been removed, and domain info preserved on device removal is successfully reused on device re-plug. This patch doesn't look good to me although I agree that keeping data is acceptable. It updates dev->archdata.iommu, but leaves the hardware context/pasid table unchanged. This might cause problems somewhere. Is there anything else I can do to help? Can you please tell me how to reproduce the problem? The most simple way to reproduce the issue, assuming there are no non-Intel graphics adapters installed, is to run the following shell commands: #!/bin/sh # load i915 module modprobe i915 # open an i915 device and keep it open in background cat /dev/dri/card0 >/dev/null & sleep 2 # simulate device unplug echo 1 >/sys/class/drm/card0/device/remove # make the background process close the device on exit kill $! I tried to reproduce the issue with above instructions. I got below kernel messages after above operation. Is it the same as what you've seen? I can't find anything explicitly related to iommu except an IOMMU fault generated after device got removed and the DMA translation structures got torn down. Can you please help me to understand how IOMMU driver triggers the issue? [ 182.217672] [ cut here ] [ 182.217679] WARNING: CPU: 1 PID: 1285 at drivers/gpu/drm/drm_mode_config.c:495 drm_mode_config_cleanup+0x2cb/0x2e0 [ 182.217680] Modules linked in: nls_iso8859_1 snd_soc_skl snd_soc_sst_ipc snd_soc_sst_dsp snd_hda_ext_core snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core intel_rapl_msr snd_hda_codec_hdmi snd_hda_codec_realtek snd_compress snd_hda_codec_generic ledtrig_audio ac97_bus snd_pcm_dmaengine snd_hda_intel snd_intel_nhlt snd_hda_codec snd_hda_core snd_hwdep snd_pcm intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm snd_seq_midi irqbypass snd_seq_midi_event snd_rawmidi crct10dif_pclmul crc32_pclmul ghash_clmulni_intel iwlmvm snd_seq mac80211 libarc4 aesni_intel snd_seq_device snd_timer crypto_simd ipu3_cio2 cryptd glue_helper iwlwifi v4l2_fwnode intel_cstate videobuf2_dma_sg videobuf2_memops intel_rapl_perf asix videobuf2_v4l2 videobuf2_common usbnet mii cfg80211 input_leds serio_raw intel_wmi_thunderbolt snd mei_me videodev soundcore intel_xhci_usb_role_switch mei 8250_dw mc roles intel_pch_thermal hid_sensor_magn_3d hid_sensor_accel_3d hid_sensor_press [ 182.217709] hid_sensor_incl_3d hid_sensor_als hid_sensor_rotation hid_sensor_gyro_3d hid_sensor_trigger industrialio_triggered_buffer kfifo_buf hid_sensor_iio_common industrialio intel_vbtn mac_hid intel_hid acpi_pad sparse_keymap sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables hid_sensor_custom hid_sensor_hub intel_ishtp_hid hid_generic dwc3 ulpi udc_core i2c_designware_platform i2c_designware_core e1000e psmouse i2c_i801 tg3 usbhid dwc3_pci hid intel_ish_ipc intel_lpss_pci intel_ishtp intel_lpss wmi pinctrl_sunrisepoint pinctrl_intel [ 182.217727] CPU: 1 PID: 1285 Comm: bash Not tainted 5.4.0-rc2+ #2704 [ 182.217728] Hardware name: Intel Corporation Skylake Client platform/Skylake Y LPDDR3 RVP3, BIOS SKLSE2R1.R00.B105.B00.1512051901 12/05/2015 [ 182.217731] RIP: 0010:drm_mode_config_cleanup+0x2cb/0x2e0 [ 182.217732] Code: eb 0c 48 8b 70 48 4c 89 e7 e8 31 f4 ff ff 48 8d 7d a0 e8 d8 95 ff ff 48 85 c0 75 e6 48 8d 7d a0 e8 1a 87 ff ff e9 ef fd ff ff <0f> 0b e9 ed fe ff ff 0f 0b eb 98 e8 05 ec 98 ff 0f 1f 44 00 00 0f [ 182.217733] RSP: 0018:a1c481d37c78 EFLAGS: 00010216 [ 182.217735] RAX: 95c7da326e08 RBX: 95c7dcf6 RCX: 801e [ 182.217735] RDX: 95c7dcf603b8 RSI: 801e RDI: 95c7dcf60390 [ 182.217736] RBP: a1c481d37cd8 R08: R09: 9e035e00 [ 182.217737] R10: a1c481d37be0 R11: 0001 R12: 95c7dcf60250 [ 182.217737] R13: 95c7dcf603b8 R14: a1c481d37ee8 R15: fff2 [ 182.217739] FS: 7f804af7f740() GS:95c7e7a8() knlGS: [ 182.217739] CS: 0010 DS: ES: CR0: 80050033 [ 182.217740] CR2: 7ffebc530cec CR3: 000137e0e003 CR4: 003606e0 [ 182.217741] DR0: DR1: DR2: [ 182.217742] DR3: DR6: fffe0ff0 DR7: 0400 [ 182.217742] Call Trace: [ 182.217748] ? _cond_resched+0x19/0x40 [ 182.217752] intel_modeset_driver_remove+0xd1/0x150 [ 182.217754] i915_driver_remove+0xb8/0x120 [ 182.21775
Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi, On 10/1/19 11:01 PM, Janusz Krzysztofik wrote: Hi Baolu, On Tuesday, September 3, 2019 9:41:23 AM CEST Janusz Krzysztofik wrote: Hi Baolu, On Tuesday, September 3, 2019 3:29:40 AM CEST Lu Baolu wrote: Hi Janusz, On 9/2/19 4:37 PM, Janusz Krzysztofik wrote: I am not saying that keeping data is not acceptable. I just want to check whether there are any other solutions. Then reverting 458b7c8e0dde and applying this patch still resolves the issue for me. No errors appear when mappings are unmapped on device close after the device has been removed, and domain info preserved on device removal is successfully reused on device re-plug. This patch doesn't look good to me although I agree that keeping data is acceptable. Any progress with that? Which mailing list should I watch for updates?\ We had a holiday last week. I will go ahead with reproducing it locally. Feel free to let me know if you have any new proposal. Best regards, Baolu Thanks, Janusz It updates dev->archdata.iommu, but leaves the hardware context/pasid table unchanged. This might cause problems somewhere. Is there anything else I can do to help? Can you please tell me how to reproduce the problem? The most simple way to reproduce the issue, assuming there are no non-Intel graphics adapters installed, is to run the following shell commands: #!/bin/sh # load i915 module modprobe i915 # open an i915 device and keep it open in background cat /dev/dri/card0 >/dev/null & sleep 2 # simulate device unplug echo 1 >/sys/class/drm/card0/device/remove # make the background process close the device on exit kill $! Thanks, Janusz Keeping the per device domain info while device is unplugged is a bit dangerous because info->dev might be a wild pointer. We need to work out a clean fix. Thanks, Janusz Best regards, Baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 5/6] iommu/intel: Declare Broadwell igfx dmar support snafu
Hi, On 9/9/19 7:00 PM, Chris Wilson wrote: Despite the widespread and complete failure of Broadwell integrated graphics when DMAR is enabled, known over the years, we have never been able to root cause the issue. Instead, we let the failure undermine our confidence in the iommu system itself when we should be pushing for it to be always enabled. Quirk away Broadwell and remove the rotten apple. References: https://bugs.freedesktop.org/show_bug.cgi?id=89360 Signed-off-by: Chris Wilson Cc: Lu Baolu This patch looks good to me. Reviewed-by: Lu Baolu -baolu Cc: Martin Peres Cc: Joerg Roedel --- drivers/iommu/intel-iommu.c | 44 + 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c4e0e4a9ee9e..34f6a3d93ae2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5690,20 +5690,46 @@ const struct iommu_ops intel_iommu_ops = { .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; -static void quirk_iommu_g4x_gfx(struct pci_dev *dev) +static void quirk_iommu_igfx(struct pci_dev *dev) { - /* G4x/GM45 integrated gfx dmar support is totally busted. */ pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); dmar_map_gfx = 0; } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx); +/* G4x/GM45 integrated gfx dmar support is totally busted. */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); + +/* Broadwell igfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); static void quirk_iommu_rwbf(struct pci_dev *dev) { ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi Janusz, On 9/2/19 4:37 PM, Janusz Krzysztofik wrote: I am not saying that keeping data is not acceptable. I just want to check whether there are any other solutions. Then reverting 458b7c8e0dde and applying this patch still resolves the issue for me. No errors appear when mappings are unmapped on device close after the device has been removed, and domain info preserved on device removal is successfully reused on device re-plug. This patch doesn't look good to me although I agree that keeping data is acceptable. It updates dev->archdata.iommu, but leaves the hardware context/pasid table unchanged. This might cause problems somewhere. Is there anything else I can do to help? Can you please tell me how to reproduce the problem? Keeping the per device domain info while device is unplugged is a bit dangerous because info->dev might be a wild pointer. We need to work out a clean fix. Thanks, Janusz Best regards, Baolu
Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi, On 8/29/19 3:58 PM, Janusz Krzysztofik wrote: Hi Baolu, On Thursday, August 29, 2019 3:43:31 AM CEST Lu Baolu wrote: Hi Janusz, On 8/28/19 10:17 PM, Janusz Krzysztofik wrote: We should avoid kernel panic when a intel_unmap() is called against a non-existent domain. Does that mean you suggest to replace BUG_ON(!domain); with something like if (WARN_ON(!domain)) return; and to not care of orphaned mappings left allocated? Is there a way to inform users that their active DMA mappings are no longer valid and they shouldn't call dma_unmap_*()? But we shouldn't expect the IOMMU driver not cleaning up the domain info when a device remove notification comes and wait until all file descriptors being closed, right? Shouldn't then the IOMMU driver take care of cleaning up resources still allocated on device remove before it invalidates and forgets their pointers? You are right. We need to wait until all allocated resources (iova and mappings) to be released. How about registering a callback for BUS_NOTIFY_UNBOUND_DRIVER, and removing the domain info when the driver detachment completes? Device core calls BUS_NOTIFY_UNBOUND_DRIVER on each driver unbind, regardless of a device being removed or not. As long as the device is not unplugged and the BUS_NOTIFY_REMOVED_DEVICE notification not generated, an unbound driver is not a problem here. Morever, BUS_NOTIFY_UNBOUND_DRIVER is called even before BUS_NOTIFY_REMOVED_DEVICE so that wouldn't help anyway. Last but not least, bus events are independent of the IOMMU driver use via DMA-API it exposes. Fair enough. If keeping data for unplugged devices and reusing it on device re-plug is not acceptable then maybe the IOMMU driver should perform reference counting of its internal resources occupied by DMA-API users and perform cleanups on last release? I am not saying that keeping data is not acceptable. I just want to check whether there are any other solutions. Best regards, Baolu
Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi Janusz, On 8/28/19 10:17 PM, Janusz Krzysztofik wrote: We should avoid kernel panic when a intel_unmap() is called against a non-existent domain. Does that mean you suggest to replace BUG_ON(!domain); with something like if (WARN_ON(!domain)) return; and to not care of orphaned mappings left allocated? Is there a way to inform users that their active DMA mappings are no longer valid and they shouldn't call dma_unmap_*()? But we shouldn't expect the IOMMU driver not cleaning up the domain info when a device remove notification comes and wait until all file descriptors being closed, right? Shouldn't then the IOMMU driver take care of cleaning up resources still allocated on device remove before it invalidates and forgets their pointers? You are right. We need to wait until all allocated resources (iova and mappings) to be released. How about registering a callback for BUS_NOTIFY_UNBOUND_DRIVER, and removing the domain info when the driver detachment completes? Thanks, Janusz Best regards, Baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi Janusz, On 8/27/19 5:35 PM, Janusz Krzysztofik wrote: Hi Lu, On Monday, August 26, 2019 10:29:12 AM CEST Lu Baolu wrote: Hi Janusz, On 8/26/19 4:15 PM, Janusz Krzysztofik wrote: Hi Lu, On Friday, August 23, 2019 3:51:11 AM CEST Lu Baolu wrote: Hi, On 8/22/19 10:29 PM, Janusz Krzysztofik wrote: When a perfectly working i915 device is hot unplugged (via sysfs) and hot re-plugged again, its dev->archdata.iommu field is not populated again with an IOMMU pointer. As a result, the device probe fails on DMA mapping error during scratch page setup. It looks like that happens because devices are not detached from their MMUIO bus before they are removed on device unplug. Then, when an already registered device/IOMMU association is identified by the reinstantiated device's bus and function IDs on IOMMU bus re-attach attempt, the device's archdata is not populated with IOMMU information and the bad happens. I'm not sure if this is a proper fix but it works for me so at least it confirms correctness of my analysis results, I believe. So far I haven't been able to identify a good place where the possibly missing IOMMU bus detach on device unplug operation could be added. Which kernel version are you testing with? Does it contain below commit? commit 458b7c8e0dde12d140e3472b80919cbb9ae793f4 Author: Lu Baolu Date: Thu Aug 1 11:14:58 2019 +0800 I was using an internal branch based on drm-tip which didn't contain this commit yet. Fortunately it has been already merged into drm-tip over last weekend and has effectively fixed the issue. Thanks for testing this. My testing appeared not sufficiently exhaustive. The fix indeed resolved my initially discovered issue of not being able to rebind the i915 driver to a re-plugged device, however it brought another, probably more serious problem to light. When an open i915 device is hot unplugged, IOMMU bus notifier now cleans up IOMMU info for the device on PCI device remove while the i915 driver is still not released, kept by open file descriptors. Then, on last device close, cleanup attempts lead to kernel panic raised from intel_unmap() on unresolved IOMMU domain. We should avoid kernel panic when a intel_unmap() is called against a non-existent domain. But we shouldn't expect the IOMMU driver not cleaning up the domain info when a device remove notification comes and wait until all file descriptors being closed, right? Best regards, Baolu With commit 458b7c8e0dde reverted and my fix applied, both late device close and device re-plug work for me. However, I can realize that's probably still not a complete solution, possibly missing some protection against reuse of a removed device other than for cleanup. If you think that's the right way to go, I can work more on that. I've had a look at other drivers and found AMD is using somehow similar approach. On the other hand, looking at the IOMMU common code I couldn't identify any arrangement that would support deferred device cleanup. If that approach is not acceptable for Intel IOMMU, please suggest a way you'd like to have it resolved and I can try to implement it. Thanks, Janusz Best regards, Lu Baolu
Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi Janusz, On 8/26/19 4:15 PM, Janusz Krzysztofik wrote: Hi Lu, On Friday, August 23, 2019 3:51:11 AM CEST Lu Baolu wrote: Hi, On 8/22/19 10:29 PM, Janusz Krzysztofik wrote: When a perfectly working i915 device is hot unplugged (via sysfs) and hot re-plugged again, its dev->archdata.iommu field is not populated again with an IOMMU pointer. As a result, the device probe fails on DMA mapping error during scratch page setup. It looks like that happens because devices are not detached from their MMUIO bus before they are removed on device unplug. Then, when an already registered device/IOMMU association is identified by the reinstantiated device's bus and function IDs on IOMMU bus re-attach attempt, the device's archdata is not populated with IOMMU information and the bad happens. I'm not sure if this is a proper fix but it works for me so at least it confirms correctness of my analysis results, I believe. So far I haven't been able to identify a good place where the possibly missing IOMMU bus detach on device unplug operation could be added. Which kernel version are you testing with? Does it contain below commit? commit 458b7c8e0dde12d140e3472b80919cbb9ae793f4 Author: Lu Baolu Date: Thu Aug 1 11:14:58 2019 +0800 I was using an internal branch based on drm-tip which didn't contain this commit yet. Fortunately it has been already merged into drm-tip over last weekend and has effectively fixed the issue. Thanks for testing this. Best regards, Lu Baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug
Hi, On 8/22/19 10:29 PM, Janusz Krzysztofik wrote: When a perfectly working i915 device is hot unplugged (via sysfs) and hot re-plugged again, its dev->archdata.iommu field is not populated again with an IOMMU pointer. As a result, the device probe fails on DMA mapping error during scratch page setup. It looks like that happens because devices are not detached from their MMUIO bus before they are removed on device unplug. Then, when an already registered device/IOMMU association is identified by the reinstantiated device's bus and function IDs on IOMMU bus re-attach attempt, the device's archdata is not populated with IOMMU information and the bad happens. I'm not sure if this is a proper fix but it works for me so at least it confirms correctness of my analysis results, I believe. So far I haven't been able to identify a good place where the possibly missing IOMMU bus detach on device unplug operation could be added. Which kernel version are you testing with? Does it contain below commit? commit 458b7c8e0dde12d140e3472b80919cbb9ae793f4 Author: Lu Baolu Date: Thu Aug 1 11:14:58 2019 +0800 iommu/vt-d: Detach domain when move device out of group When removing a device from an iommu group, the domain should be detached from the device. Otherwise, the stale domain info will still be cached by the driver and the driver will refuse to attach any domain to the device again. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Fixes: b7297783c2bb6 ("iommu/vt-d: Remove duplicated code for device hotplug") Reported-and-tested-by: Vlad Buslov Suggested-by: Robin Murphy Link: https://lkml.org/lkml/2019/7/26/1133 Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Best regards, Lu Baolu Signed-off-by: Janusz Krzysztofik --- drivers/iommu/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 12d094d08c0a..7cdcd0595408 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2477,6 +2477,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (info2) { found = info2->domain; info2->dev = dev; + + if (dev && !dev->archdata.iommu) + dev->archdata.iommu = info2; } }
Re: [Intel-gfx] IOMMU RMRR for Intel graphic device
Hi Daniel, On 5/10/19 4:34 PM, Daniel Vetter wrote: On Fri, May 10, 2019 at 01:21:59PM +0800, Lu Baolu wrote: Forward to i915 mailing list and post the question again so people knows what are the key concerns. The background is that Linux community is going to collect and report the reserved memory ranges of an assigned device to VFIO driver, and any mapped address will be checked against the reserved ranges. If there's any conflict, vifo will refuse the map request. Unfortunately, when it comes Intel graphic device, the conflict happens. That means address range mapped through vfio conflicts with the rmrr for graphic device. https://lkml.org/lkml/2018/6/5/760 The questions are 1) will the rmrr for graphic device still needs to be reserved for BIOS or firmware use, when a device is going to be assigned to user level? 2) if no, what's the check point, after which the rmrr is unnecessary anymore? The gfx RMRR isn't for the bios, it's for the driver. It covers stolen memory, and we need it. There's various piles of hacks to disable use of stolen, but they're all somewhat fragile, and afaiui for huc/guc we need it, and huc is part of the uapi exposed by the driver/device combo on gen9+. Until our hw folks come up with a better design I think we're just stuck on this, iow you can't pass-thru modern intel igfx because of this. -Daniel Thanks for the explanation. It's clear to me now. Best regards, Lu Baolu Best regards, Lu Baolu On 5/9/19 5:14 PM, Zhang, Tina wrote: Hi Baolu, +Xiong I think the root cause is that guest i915 needs to access RMRR. Xiong cooked a patch to disable the RMRR use in i915 guest, however that patch didn't get landed into the i915 upstream repo due to some concerns from i915 maintainers. Xiong can give us more backgrounds. So agreed, need to ask the i915 folk for this. BR, Tina -Original Message- From: Yuan, Hang Sent: Thursday, May 9, 2019 4:07 PM To: Lu Baolu ; Tian, Kevin ; Zhenyu Wang ; Zhang, Tina ; Lu, Baolu ; Liu, Yi L Subject: RE: IOMMU RMRR for Intel graphic device Hi Baolu, as Kevin suggested, would you like to ask i915 people in their mailing list intel-gfx@lists.freedesktop.org? Regards, Henry -Original Message- From: Lu Baolu [mailto:baolu...@linux.intel.com] Sent: Thursday, May 9, 2019 2:42 PM To: Tian, Kevin ; Zhenyu Wang ; Zhang, Tina ; Yuan, Hang ; Lu, Baolu ; Liu, Yi L Cc: baolu...@linux.intel.com Subject: Re: IOMMU RMRR for Intel graphic device Hi, +Tina and Henry and cc more people The background is that Linux community is going to collect and report the reserved memory ranges of an assigned device to VFIO driver, and any mapped address will be checked against the reserved ranges. If there's any conflict, vifo will refuse the map request. Unfortunately, when it comes Intel graphic device, the conflict happens. That means address range mapped through vfio conflicts with the rmrr for graphic device. https://lkml.org/lkml/2018/6/5/760 The questions are 1) will the rmrr for graphic device still needs to be reserved for BIOS or firmware use, when a device is going to be assigned to user level? 2) if no, what's the check point, after which the rmrr is unnecessary anymore? Best regards, Lu Baolu On 5/6/19 2:16 PM, Tian, Kevin wrote: this should better be asked to i915 guys, since it's not virtualization related. :-) One caveat, iirc, i915 driver tries to reuse stolen memory (covered by RMRR) even after boot time. take it as if another type of memory resource. If true I'm afraid this might be a gap to your proposal. Since nothing confidential, possibly you can directly discuss in community. From: Lu Baolu [mailto:baolu...@linux.intel.com] Sent: Thursday, May 2, 2019 2:45 PM Ping... Any comments? This has been postponed in the community for a long time. We need to response this as soon as possible. Best regards, Lu Baolu On 4/29/19 1:19 PM, Lu Baolu wrote: Hi Zhenyu, As we discussed, BIOS always exports IOMMU reserved memory regions for (a.k.a. RMRR in vt-d spec) Intel integrated graphic device. This caused some problems when we pass-through such graphic devices to user level. I am about to propose something to the community so that a RMRR for graphic devices could be explicitly canceled as long as the driver (i915 or vfio) knows that the RMRR will never be used by BIOS anymore. The same story happens for USB host controller devices. And since we know that BIOS will stop using that memory region as soon as the driver clears the SMI bits. So the question is, can graphic driver know when the RMRR for graphic could be canceled? Best regards, Lu Baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] IOMMU RMRR for Intel graphic device
Forward to i915 mailing list and post the question again so people knows what are the key concerns. The background is that Linux community is going to collect and report the reserved memory ranges of an assigned device to VFIO driver, and any mapped address will be checked against the reserved ranges. If there's any conflict, vifo will refuse the map request. Unfortunately, when it comes Intel graphic device, the conflict happens. That means address range mapped through vfio conflicts with the rmrr for graphic device. https://lkml.org/lkml/2018/6/5/760 The questions are 1) will the rmrr for graphic device still needs to be reserved for BIOS or firmware use, when a device is going to be assigned to user level? 2) if no, what's the check point, after which the rmrr is unnecessary anymore? Best regards, Lu Baolu On 5/9/19 5:14 PM, Zhang, Tina wrote: Hi Baolu, +Xiong I think the root cause is that guest i915 needs to access RMRR. Xiong cooked a patch to disable the RMRR use in i915 guest, however that patch didn't get landed into the i915 upstream repo due to some concerns from i915 maintainers. Xiong can give us more backgrounds. So agreed, need to ask the i915 folk for this. BR, Tina -Original Message- From: Yuan, Hang Sent: Thursday, May 9, 2019 4:07 PM To: Lu Baolu ; Tian, Kevin ; Zhenyu Wang ; Zhang, Tina ; Lu, Baolu ; Liu, Yi L Subject: RE: IOMMU RMRR for Intel graphic device Hi Baolu, as Kevin suggested, would you like to ask i915 people in their mailing list intel-gfx@lists.freedesktop.org? Regards, Henry -Original Message- From: Lu Baolu [mailto:baolu...@linux.intel.com] Sent: Thursday, May 9, 2019 2:42 PM To: Tian, Kevin ; Zhenyu Wang ; Zhang, Tina ; Yuan, Hang ; Lu, Baolu ; Liu, Yi L Cc: baolu...@linux.intel.com Subject: Re: IOMMU RMRR for Intel graphic device Hi, +Tina and Henry and cc more people The background is that Linux community is going to collect and report the reserved memory ranges of an assigned device to VFIO driver, and any mapped address will be checked against the reserved ranges. If there's any conflict, vifo will refuse the map request. Unfortunately, when it comes Intel graphic device, the conflict happens. That means address range mapped through vfio conflicts with the rmrr for graphic device. https://lkml.org/lkml/2018/6/5/760 The questions are 1) will the rmrr for graphic device still needs to be reserved for BIOS or firmware use, when a device is going to be assigned to user level? 2) if no, what's the check point, after which the rmrr is unnecessary anymore? Best regards, Lu Baolu On 5/6/19 2:16 PM, Tian, Kevin wrote: this should better be asked to i915 guys, since it's not virtualization related. :-) One caveat, iirc, i915 driver tries to reuse stolen memory (covered by RMRR) even after boot time. take it as if another type of memory resource. If true I'm afraid this might be a gap to your proposal. Since nothing confidential, possibly you can directly discuss in community. From: Lu Baolu [mailto:baolu...@linux.intel.com] Sent: Thursday, May 2, 2019 2:45 PM Ping... Any comments? This has been postponed in the community for a long time. We need to response this as soon as possible. Best regards, Lu Baolu On 4/29/19 1:19 PM, Lu Baolu wrote: Hi Zhenyu, As we discussed, BIOS always exports IOMMU reserved memory regions for (a.k.a. RMRR in vt-d spec) Intel integrated graphic device. This caused some problems when we pass-through such graphic devices to user level. I am about to propose something to the community so that a RMRR for graphic devices could be explicitly canceled as long as the driver (i915 or vfio) knows that the RMRR will never be used by BIOS anymore. The same story happens for USB host controller devices. And since we know that BIOS will stop using that memory region as soon as the driver clears the SMI bits. So the question is, can graphic driver know when the RMRR for graphic could be canceled? Best regards, Lu Baolu ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/1] Revert "iommu/vt-d: Clean up pasid quirk for pre-production devices"
Hi Joerg, The graphic guys are looking forward to having this in 4.18. Is it possible to take it in the following rcs? Best regards, Lu Baolu On 07/08/2018 02:23 PM, Lu Baolu wrote: > This reverts commit ab96746aaa344fb720a198245a837e266fad3b62. > > The commit ab96746aaa34 ("iommu/vt-d: Clean up pasid quirk for > pre-production devices") triggers ECS mode on some platforms > which have broken ECS support. As the result, graphic device > will be inoperable on boot. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107017 > > Cc: Ashok Raj > Signed-off-by: Lu Baolu > --- > drivers/iommu/intel-iommu.c | 32 ++-- > include/linux/intel-iommu.h | 1 + > 2 files changed, 31 insertions(+), 2 deletions(-) > > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index b344a88..115ff26 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -484,14 +484,37 @@ static int dmar_forcedac; > static int intel_iommu_strict; > static int intel_iommu_superpage = 1; > static int intel_iommu_ecs = 1; > +static int intel_iommu_pasid28; > static int iommu_identity_mapping; > > #define IDENTMAP_ALL 1 > #define IDENTMAP_GFX 2 > #define IDENTMAP_AZALIA 4 > > -#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap)) > -#define pasid_enabled(iommu) (ecs_enabled(iommu) && ecap_pasid(iommu->ecap)) > +/* Broadwell and Skylake have broken ECS support — normal so-called "second > + * level" translation of DMA requests-without-PASID doesn't actually happen > + * unless you also set the NESTE bit in an extended context-entry. Which of > + * course means that SVM doesn't work because it's trying to do nested > + * translation of the physical addresses it finds in the process page tables, > + * through the IOVA->phys mapping found in the "second level" page tables. > + * > + * The VT-d specification was retroactively changed to change the definition > + * of the capability bits and pretend that Broadwell/Skylake never > happened... > + * but unfortunately the wrong bit was changed. It's ECS which is broken, but > + * for some reason it was the PASID capability bit which was redefined (from > + * bit 28 on BDW/SKL to bit 40 in future). > + * > + * So our test for ECS needs to eschew those implementations which set the > old > + * PASID capabiity bit 28, since those are the ones on which ECS is broken. > + * Unless we are working around the 'pasid28' limitations, that is, by > putting > + * the device into passthrough mode for normal DMA and thus masking the bug. > + */ > +#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ > + (intel_iommu_pasid28 || > !ecap_broken_pasid(iommu->ecap))) > +/* PASID support is thus enabled if ECS is enabled and *either* of the old > + * or new capability bits are set. */ > +#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ > + (ecap_pasid(iommu->ecap) || > ecap_broken_pasid(iommu->ecap))) > > int intel_iommu_gfx_mapped; > EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); > @@ -554,6 +577,11 @@ static int __init intel_iommu_setup(char *str) > printk(KERN_INFO > "Intel-IOMMU: disable extended context table > support\n"); > intel_iommu_ecs = 0; > + } else if (!strncmp(str, "pasid28", 7)) { > + printk(KERN_INFO > + "Intel-IOMMU: enable pre-production PASID > support\n"); > + intel_iommu_pasid28 = 1; > + iommu_identity_mapping |= IDENTMAP_GFX; > } else if (!strncmp(str, "tboot_noforce", 13)) { > printk(KERN_INFO > "Intel-IOMMU: not forcing on after tboot. This > could expose security risk for tboot\n"); > diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h > index 1df9401..ef169d6 100644 > --- a/include/linux/intel-iommu.h > +++ b/include/linux/intel-iommu.h > @@ -121,6 +121,7 @@ > #define ecap_srs(e) ((e >> 31) & 0x1) > #define ecap_ers(e) ((e >> 30) & 0x1) > #define ecap_prs(e) ((e >> 29) & 0x1) > +#define ecap_broken_pasid(e) ((e >> 28) & 0x1) > #define ecap_dis(e) ((e >> 27) & 0x1) > #define ecap_nest(e) ((e >> 26) & 0x1) > #define ecap_mts(e) ((e >> 25) & 0x1) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx