[PATCH 1/1] iommu/vt-d: Decouple igfx_off from graphic identity mapping

2024-04-27 Thread Lu Baolu
A kernel command called igfx_off was introduced in commit 
("Intel IOMMU: Intel IOMMU driver"). This command allows the user to
disable the IOMMU dedicated to SOC-integrated graphic devices.

Commit <9452618e7462> ("iommu/intel: disable DMAR for g4x integrated gfx")
used this mechanism to disable the graphic-dedicated IOMMU for some
problematic devices. Later, more problematic graphic devices were added
to the list by commit <1f76249cc3beb> ("iommu/vt-d: Declare Broadwell igfx
dmar support snafu").

On the other hand, commit <19943b0e30b05> ("intel-iommu: Unify hardware
and software passthrough support") uses the identity domain for graphic
devices if CONFIG_DMAR_BROKEN_GFX_WA is selected.

+   if (iommu_pass_through)
+   iommu_identity_mapping = 1;
+#ifdef CONFIG_DMAR_BROKEN_GFX_WA
+   else
+   iommu_identity_mapping = 2;
+#endif
...

static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
{
+if (iommu_identity_mapping == 2)
+return IS_GFX_DEVICE(pdev);
...

In the following driver evolution, CONFIG_DMAR_BROKEN_GFX_WA and
quirk_iommu_igfx() are mixed together, causing confusion in the driver's
device_def_domain_type callback. On one hand, dmar_map_gfx is used to turn
off the graphic-dedicated IOMMU as a workaround for some buggy hardware;
on the other hand, for those graphic devices, IDENTITY mapping is required
for the IOMMU core.

Commit <4b8d18c0c986> "iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA" has
removed the CONFIG_DMAR_BROKEN_GFX_WA option, so the IDENTITY_DOMAIN
requirement for graphic devices is no longer needed. Therefore, this
requirement can be removed from device_def_domain_type() and igfx_off can
be made independent.

Fixes: 4b8d18c0c986 ("iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA")
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index fbbf8fda22f3..57a986524502 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -217,12 +217,11 @@ int intel_iommu_sm = 
IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
 int intel_iommu_enabled = 0;
 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 
-static int dmar_map_gfx = 1;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
 static int iommu_skip_te_disable;
+static int disable_igfx_dedicated_iommu;
 
-#define IDENTMAP_GFX   2
 #define IDENTMAP_AZALIA4
 
 const struct iommu_ops intel_iommu_ops;
@@ -261,7 +260,7 @@ static int __init intel_iommu_setup(char *str)
no_platform_optin = 1;
pr_info("IOMMU disabled\n");
} else if (!strncmp(str, "igfx_off", 8)) {
-   dmar_map_gfx = 0;
+   disable_igfx_dedicated_iommu = 1;
pr_info("Disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
pr_warn("intel_iommu=forcedac deprecated; use 
iommu.forcedac instead\n");
@@ -2196,9 +2195,6 @@ static int device_def_domain_type(struct device *dev)
 
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && 
IS_AZALIA(pdev))
return IOMMU_DOMAIN_IDENTITY;
-
-   if ((iommu_identity_mapping & IDENTMAP_GFX) && 
IS_GFX_DEVICE(pdev))
-   return IOMMU_DOMAIN_IDENTITY;
}
 
return 0;
@@ -2499,9 +2495,6 @@ static int __init init_dmars(void)
iommu_set_root_entry(iommu);
}
 
-   if (!dmar_map_gfx)
-   iommu_identity_mapping |= IDENTMAP_GFX;
-
check_tylersburg_isoch();
 
ret = si_domain_init(hw_pass_through);
@@ -2592,7 +2585,7 @@ static void __init init_no_remapping_devices(void)
/* This IOMMU has *only* gfx devices. Either bypass it or
   set the gfx_mapped flag, as appropriate */
drhd->gfx_dedicated = 1;
-   if (!dmar_map_gfx)
+   if (disable_igfx_dedicated_iommu)
drhd->ignored = 1;
}
 }
@@ -4621,7 +4614,7 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
return;
 
pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
-   dmar_map_gfx = 0;
+   disable_igfx_dedicated_iommu = 1;
 }
 
 /* G4x/GM45 integrated gfx dmar support is totally busted. */
@@ -4702,8 +4695,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev 
*dev)
 
if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
pci_info(dev, "BIOS has allocated no shadow GTT; disabling 
IOMMU for graphics\n");
-   dmar_map_gfx = 0;
-   } else if (dmar_

[PATCH v2 1/1] iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA

2024-01-29 Thread Lu Baolu
Commit 62edf5dc4a524 ("intel-iommu: Restore DMAR_BROKEN_GFX_WA option for
broken graphics drivers") was introduced 24 years ago as a temporary
workaround for graphics drivers that used physical addresses for DMA and
avoided DMA APIs. This workaround was disabled by default.

As 24 years have passed, it is expected that graphics driver developers
have migrated their drivers to use kernel DMA APIs. Therefore, this
workaround is no longer required and could been removed.

The Intel iommu driver also provides a "igfx_off" option to turn off
the DAM translation for the graphic dedicated IOMMU. Hence, there is
really no good reason to keep this config option.

Suggested-by: Kevin Tian 
Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel/iommu.c |  4 
 drivers/iommu/intel/Kconfig | 11 ---
 2 files changed, 15 deletions(-)

Change log:
v2:
 - Add igfx_off option to commit message and Cc Intel graphic
   maintainers.

v1: 
https://lore.kernel.org/linux-iommu/20240127064512.16744-1-baolu...@linux.intel.com/

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 6fb5f6fceea1..fc52fcd786aa 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2660,10 +2660,6 @@ static int __init init_dmars(void)
iommu_set_root_entry(iommu);
}
 
-#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
-   dmar_map_gfx = 0;
-#endif
-
if (!dmar_map_gfx)
iommu_identity_mapping |= IDENTMAP_GFX;
 
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 012cd2541a68..d2d34eb28d94 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -64,17 +64,6 @@ config INTEL_IOMMU_DEFAULT_ON
  one is found. If this option is not selected, DMAR support can
  be enabled by passing intel_iommu=on to the kernel.
 
-config INTEL_IOMMU_BROKEN_GFX_WA
-   bool "Workaround broken graphics drivers (going away soon)"
-   depends on BROKEN && X86
-   help
- Current Graphics drivers tend to use physical address
- for DMA and avoid using DMA APIs. Setting this config
- option permits the IOMMU driver to set a unity map for
- all the OS-visible memory. Hence the driver can continue
- to use physical addresses for DMA, at least until this
- option is removed in the 2.6.32 kernel.
-
 config INTEL_IOMMU_FLOPPY_WA
def_bool y
depends on X86
-- 
2.34.1



Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-26 Thread Lu Baolu

On 2021/11/25 19:47, Robin Murphy wrote:

On 2021-11-25 10:42, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

With both integrated and discrete Intel GPUs in a system, the current
global check of intel_iommu_gfx_mapped, as done from intel_vtd_active()
may not be completely accurate.

In this patch we add i915 parameter to intel_vtd_active() in order to
prepare it for multiple GPUs and we also change the check away from Intel
specific intel_iommu_gfx_mapped (global exported by the Intel IOMMU
driver) to probing the presence of IOMMU domain on a specific device
using iommu_get_domain_for_dev().


FWIW the way you have it now is functionally equivalent to using 
device_iommu_mapped(), which I think might be slightly clearer for the 
current intent, but I don't have a significantly strong preference 
(after all, this *was* the de-facto way of checking before 
device_iommu_mapped() was introduced, and there are still other examples 
of it around). So from the IOMMU perspective,


Acked-by: Robin Murphy 

Perhaps the AGP driver could also be tweaked and intel_iommu_gfx_mapped 
cleaned away entirely, but I'll leave that for Baolu to think about :)


I fully agreed with Robin.

I prefer device_iommu_mapped() more than iommu_get_domain_for_dev().

"iommu_get_domain_for_dev(dev) == NULL" simply means that the device
does not have any domain attached. Although at present, it is equivalent
to device DMAing without IOMMU translation. But I'm sure it will change
in the future.

With device_iommu_mapped() replaced,

Reviewed-by: Lu Baolu 

Best regards,
baolu


Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-11 Thread Lu Baolu

On 11/11/21 11:18 PM, Tvrtko Ursulin wrote:


On 10/11/2021 14:37, Robin Murphy wrote:

On 2021-11-10 14:11, Tvrtko Ursulin wrote:


On 10/11/2021 12:35, Lu Baolu wrote:

On 2021/11/10 20:08, Tvrtko Ursulin wrote:


On 10/11/2021 12:04, Lu Baolu wrote:

On 2021/11/10 17:30, Tvrtko Ursulin wrote:


On 10/11/2021 07:12, Lu Baolu wrote:

Hi Tvrtko,

On 2021/11/9 20:17, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin

On igfx + dgfx setups, it appears that intel_iommu=igfx_off 
option only
disables the igfx iommu. Stop relying on global 
intel_iommu_gfx_mapped
and probe presence of iommu domain per device to accurately 
reflect its

status.

Signed-off-by: Tvrtko Ursulin
Cc: Lu Baolu
---
Baolu, is my understanding here correct? Maybe I am confused by 
both
intel_iommu_gfx_mapped and dmar_map_gfx being globals in the 
intel_iommu
driver. But it certainly appears the setup can assign some 
iommu ops (and
assign the discrete i915 to iommu group) when those two are set 
to off.


diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index e967cd08f23e..9fb38a54f1fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void)
  #define HAS_D12_PLANE_MINIMIZATION(dev_priv) 
(IS_ROCKETLAKE(dev_priv) || \

    IS_ALDERLAKE_S(dev_priv))

-static inline bool intel_vtd_active(void)
+static inline bool intel_vtd_active(struct drm_i915_private *i915)
  {
-#ifdef CONFIG_INTEL_IOMMU
-    if (intel_iommu_gfx_mapped)
+    if (iommu_get_domain_for_dev(i915->drm.dev))
  return true;
-#endif

  /* Running as a guest, we assume the host is enforcing 
VT'd */

  return run_as_guest();
  }

Have you verified this change? I am afraid that
iommu_get_domain_for_dev() always gets a valid iommu domain even
intel_iommu_gfx_mapped == 0.


Yes it seems to work as is:

default:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

intel_iommu=igfx_off:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

On my system dri device 0 is integrated graphics and 1 is discrete.


The drm device 0 has a dedicated iommu. When the user request igfx 
not
mapped, the VT-d implementation will turn it off to save power. 
But for

shared iommu, you definitely will get it enabled.


Sorry I am not following, what exactly do you mean? Is there a 
platform with integrated graphics without a dedicated iommu, in 
which case intel_iommu=igfx_off results in intel_iommu_gfx_mapped 
== 0 and iommu_get_domain_for_dev returning non-NULL?


Your code always work for an igfx with a dedicated iommu. This might be
always true on today's platforms. But from driver's point of view, we
should not make such assumption.

For example, if the iommu implementation decides not to turn off the
graphic iommu (perhaps due to some hw quirk or for graphic
virtualization), your code will be broken.


If I got it right, this would go back to your earlier recommendation 
to have the check look like this:


static bool intel_vtd_active(struct drm_i915_private *i915)
{
 struct iommu_domain *domain;

 domain = iommu_get_domain_for_dev(i915->drm.dev);
 if (domain && (domain->type & __IOMMU_DOMAIN_PAGING))
 return true;
 ...

This would be okay as a first step?

Elsewhere in the thread Robin suggested looking at the dec->dma_ops 
and comparing against iommu_dma_ops. These two solution would be 
effectively the same?


Effectively, yes. See iommu_setup_dma_ops() - the only way to end up 
with iommu_dma_ops is if a managed translation domain is present; if 
the IOMMU is present but the default domain type has been set to 
passthrough (either globally or forced for the given device) it will 
do nothing and leave you with dma-direct, while if the IOMMU has been 
ignored entirely then it should never even be called. Thus it neatly 
encapsulates what you're after here.


One concern I have is whether the pass-through mode truly does nothing 
or addresses perhaps still go through the dmar hardware just with no 
translation?


Pass-through mode means the latter.



If latter then most like for like change is actually exactly what the 
first version of my patch did. That is replace intel_iommu_gfx_mapped 
with a plain non-NULL check on iommu_get_domain_for_dev.


Depends on what you want here,

#1) the graphic device works in iommu pass-through mode
   - device have an iommu
   - but iommu does no translation
   - the dma transactions go through iommu with the same destination
 memory address specified by the device;

#2) the graphic device works without a system iommu
   - the iommu is off
   - there's no iommu on the path of DMA transaction.

My

Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-11 Thread Lu Baolu

On 11/11/21 11:06 PM, Tvrtko Ursulin wrote:


On 10/11/2021 12:35, Lu Baolu wrote:

On 2021/11/10 20:08, Tvrtko Ursulin wrote:


On 10/11/2021 12:04, Lu Baolu wrote:

On 2021/11/10 17:30, Tvrtko Ursulin wrote:


On 10/11/2021 07:12, Lu Baolu wrote:

Hi Tvrtko,

On 2021/11/9 20:17, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin

On igfx + dgfx setups, it appears that intel_iommu=igfx_off 
option only
disables the igfx iommu. Stop relying on global 
intel_iommu_gfx_mapped
and probe presence of iommu domain per device to accurately 
reflect its

status.

Signed-off-by: Tvrtko Ursulin
Cc: Lu Baolu
---
Baolu, is my understanding here correct? Maybe I am confused by both
intel_iommu_gfx_mapped and dmar_map_gfx being globals in the 
intel_iommu
driver. But it certainly appears the setup can assign some iommu 
ops (and
assign the discrete i915 to iommu group) when those two are set 
to off.


diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index e967cd08f23e..9fb38a54f1fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void)
  #define HAS_D12_PLANE_MINIMIZATION(dev_priv) 
(IS_ROCKETLAKE(dev_priv) || \

    IS_ALDERLAKE_S(dev_priv))

-static inline bool intel_vtd_active(void)
+static inline bool intel_vtd_active(struct drm_i915_private *i915)
  {
-#ifdef CONFIG_INTEL_IOMMU
-    if (intel_iommu_gfx_mapped)
+    if (iommu_get_domain_for_dev(i915->drm.dev))
  return true;
-#endif

  /* Running as a guest, we assume the host is enforcing VT'd */
  return run_as_guest();
  }

Have you verified this change? I am afraid that
iommu_get_domain_for_dev() always gets a valid iommu domain even
intel_iommu_gfx_mapped == 0.


Yes it seems to work as is:

default:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

intel_iommu=igfx_off:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

On my system dri device 0 is integrated graphics and 1 is discrete.


The drm device 0 has a dedicated iommu. When the user request igfx not
mapped, the VT-d implementation will turn it off to save power. But for
shared iommu, you definitely will get it enabled.


Sorry I am not following, what exactly do you mean? Is there a 
platform with integrated graphics without a dedicated iommu, in which 
case intel_iommu=igfx_off results in intel_iommu_gfx_mapped == 0 and 
iommu_get_domain_for_dev returning non-NULL?


Your code always work for an igfx with a dedicated iommu. This might be
always true on today's platforms. But from driver's point of view, we
should not make such assumption.

For example, if the iommu implementation decides not to turn off the
graphic iommu (perhaps due to some hw quirk or for graphic
virtualization), your code will be broken.


I tried your suggestion (checking for __IOMMU_DOMAIN_PAGING) and it 
works better, however I have observed one odd behaviour (for me at least).


In short - why does the DMAR mode for the discrete device change 
depending on igfx_off parameter?


Consider the laptop has these two graphics cards:

# cat /sys/kernel/debug/dri/0/name
i915 dev=:00:02.0 unique=:00:02.0 # integrated

# cat /sys/kernel/debug/dri/1/name
i915 dev=:03:00.0 unique=:03:00.0 # discrete

Booting with different options:
===

default / intel_iommu=on


# cat /sys/class/iommu/dmar0/devices/:00:02.0/iommu_group/type
DMA-FQ
# cat /sys/class/iommu/dmar2/devices/:03:00.0/iommu_group/type
DMA-FQ

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

All good.

intel_iommu=igfx_off


## no dmar0 in sysfs
# cat /sys/class/iommu/dmar2/devices/:03:00.0/iommu_group/type
identity

Unexpected!?

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: disabled # At least the 
i915 patch detects it correctly.


intel_iommu=off
---

## no dmar0 in sysfs
## no dmar2 in sysfs

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: disabled

All good.

The fact discrete graphics changes from translated to pass-through when 
igfx_off is set is surprising to me. Is this a bug?


The existing VT-d implementation doesn't distinguish igfx from dgfx. It
only checks whether the device is of a display class:

#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)

When igfx_off 

Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-10 Thread Lu Baolu

On 2021/11/10 20:08, Tvrtko Ursulin wrote:


On 10/11/2021 12:04, Lu Baolu wrote:

On 2021/11/10 17:30, Tvrtko Ursulin wrote:


On 10/11/2021 07:12, Lu Baolu wrote:

Hi Tvrtko,

On 2021/11/9 20:17, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin

On igfx + dgfx setups, it appears that intel_iommu=igfx_off option 
only

disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped
and probe presence of iommu domain per device to accurately reflect 
its

status.

Signed-off-by: Tvrtko Ursulin
Cc: Lu Baolu
---
Baolu, is my understanding here correct? Maybe I am confused by both
intel_iommu_gfx_mapped and dmar_map_gfx being globals in the 
intel_iommu
driver. But it certainly appears the setup can assign some iommu 
ops (and
assign the discrete i915 to iommu group) when those two are set to 
off.


diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index e967cd08f23e..9fb38a54f1fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void)
  #define HAS_D12_PLANE_MINIMIZATION(dev_priv) 
(IS_ROCKETLAKE(dev_priv) || \

    IS_ALDERLAKE_S(dev_priv))

-static inline bool intel_vtd_active(void)
+static inline bool intel_vtd_active(struct drm_i915_private *i915)
  {
-#ifdef CONFIG_INTEL_IOMMU
-    if (intel_iommu_gfx_mapped)
+    if (iommu_get_domain_for_dev(i915->drm.dev))
  return true;
-#endif

  /* Running as a guest, we assume the host is enforcing VT'd */
  return run_as_guest();
  }

Have you verified this change? I am afraid that
iommu_get_domain_for_dev() always gets a valid iommu domain even
intel_iommu_gfx_mapped == 0.


Yes it seems to work as is:

default:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

intel_iommu=igfx_off:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

On my system dri device 0 is integrated graphics and 1 is discrete.


The drm device 0 has a dedicated iommu. When the user request igfx not
mapped, the VT-d implementation will turn it off to save power. But for
shared iommu, you definitely will get it enabled.


Sorry I am not following, what exactly do you mean? Is there a platform 
with integrated graphics without a dedicated iommu, in which case 
intel_iommu=igfx_off results in intel_iommu_gfx_mapped == 0 and 
iommu_get_domain_for_dev returning non-NULL?


Your code always work for an igfx with a dedicated iommu. This might be
always true on today's platforms. But from driver's point of view, we
should not make such assumption.

For example, if the iommu implementation decides not to turn off the
graphic iommu (perhaps due to some hw quirk or for graphic
virtualization), your code will be broken.

Best regards,
baolu


Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-10 Thread Lu Baolu

On 2021/11/10 17:35, Tvrtko Ursulin wrote:


On 10/11/2021 07:25, Lu Baolu wrote:

On 2021/11/10 1:35, Tvrtko Ursulin wrote:


On 09/11/2021 17:19, Lucas De Marchi wrote:

On Tue, Nov 09, 2021 at 12:17:59PM +, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

On igfx + dgfx setups, it appears that intel_iommu=igfx_off option 
only

disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped
and probe presence of iommu domain per device to accurately reflect 
its

status.


nice, I was just starting to look into thus but for another reason: we
are adding support for other archs, like aarch64, and the global 
from here

was a problem


Yes I realized the other iommu angle as well. To do this properly we 
need to sort the intel_vtd_active call sites into at least two 
buckets - which are truly about VT-d and which are just IOMMU.


For instance the THP decision in i915_gemfs.co would be "are we 
behind any iommu". Some other call sites are possibly only about the 
bugs in the igfx iommu. Not sure if there is a third bucket for any 
potential differences between igfx iommu and other Intel iommu in 
case of dgfx.


I'd like to hear from Baolu as well to confirm if intel_iommu driver 
is handling igfx + dgfx correctly in respect to the two global 
variables I mention in the commit message.


I strongly agree that the drivers should call the IOMMU interface
directly for portability. For Intel graphic driver, we have two issues:

#1) driver asks vt-d driver for identity map with intel_iommu=igfx_off.
#2) driver query the status with a global intel_iommu_gfx_mapped.

We need to solve these two problems step by step. This patch is
definitely a good start point.


(I should have really consolidated the thread, but never mind now.)

You mean good starting point for the discussion or between your first 
and second email you started thinking it may even work?


Because as I wrote in the other email, it appears to work. But I fully 
accept it may be by accident and you may suggest a proper API to be 
added to the IOMMU core, which I would then be happy to use.


If maybe not immediately, perhaps we could start with this patch and 
going forward add something more detailed. Like for instance allowing us 
to query the name/id of the iommu driver in case i915 needs to apply 
different quirks across them? Not sure how feasible that would be, but 
at the moment the need does sound plausible to me.


The whole story in my mind looks like below:

1. The graphic device driver has its own way to let the user specify
   iommu-bypass mode. For example, early_param()...

2. If the iommu-bypass mode is set, the graphic device will get below
   set:

dev->dma_ops_bypass = true;

3. The iommu probe procedure will use identity domain (non-mapped mode)
   for the device because value of dev->dma_ops_bypass is true.

That's all. The device driver doesn't need to check iommu for the
mapping mode. And this framework will be generic and could be used by
other device drivers.

The difficulty in doing so is that step 2 must happen before or during
the device probe process. At present, I haven't figured out how to do
it yet. :-)

Best regards,
baolu


Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-10 Thread Lu Baolu

On 2021/11/10 17:30, Tvrtko Ursulin wrote:


On 10/11/2021 07:12, Lu Baolu wrote:

Hi Tvrtko,

On 2021/11/9 20:17, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin

On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only
disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped
and probe presence of iommu domain per device to accurately reflect its
status.

Signed-off-by: Tvrtko Ursulin
Cc: Lu Baolu
---
Baolu, is my understanding here correct? Maybe I am confused by both
intel_iommu_gfx_mapped and dmar_map_gfx being globals in the intel_iommu
driver. But it certainly appears the setup can assign some iommu ops 
(and

assign the discrete i915 to iommu group) when those two are set to off.


diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index e967cd08f23e..9fb38a54f1fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void)
  #define HAS_D12_PLANE_MINIMIZATION(dev_priv) 
(IS_ROCKETLAKE(dev_priv) || \

    IS_ALDERLAKE_S(dev_priv))

-static inline bool intel_vtd_active(void)
+static inline bool intel_vtd_active(struct drm_i915_private *i915)
  {
-#ifdef CONFIG_INTEL_IOMMU
-    if (intel_iommu_gfx_mapped)
+    if (iommu_get_domain_for_dev(i915->drm.dev))
  return true;
-#endif

  /* Running as a guest, we assume the host is enforcing VT'd */
  return run_as_guest();
  }

Have you verified this change? I am afraid that
iommu_get_domain_for_dev() always gets a valid iommu domain even
intel_iommu_gfx_mapped == 0.


Yes it seems to work as is:

default:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: enabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

intel_iommu=igfx_off:

# grep -i iommu /sys/kernel/debug/dri/*/i915_capabilities
/sys/kernel/debug/dri/0/i915_capabilities:iommu: disabled
/sys/kernel/debug/dri/1/i915_capabilities:iommu: enabled

On my system dri device 0 is integrated graphics and 1 is discrete.


The drm device 0 has a dedicated iommu. When the user request igfx not
mapped, the VT-d implementation will turn it off to save power. But for
shared iommu, you definitely will get it enabled.

Best regards,
baolu


Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-09 Thread Lu Baolu

On 2021/11/10 1:35, Tvrtko Ursulin wrote:


On 09/11/2021 17:19, Lucas De Marchi wrote:

On Tue, Nov 09, 2021 at 12:17:59PM +, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only
disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped
and probe presence of iommu domain per device to accurately reflect its
status.


nice, I was just starting to look into thus but for another reason: we
are adding support for other archs, like aarch64, and the global from 
here

was a problem


Yes I realized the other iommu angle as well. To do this properly we 
need to sort the intel_vtd_active call sites into at least two buckets - 
which are truly about VT-d and which are just IOMMU.


For instance the THP decision in i915_gemfs.co would be "are we behind 
any iommu". Some other call sites are possibly only about the bugs in 
the igfx iommu. Not sure if there is a third bucket for any potential 
differences between igfx iommu and other Intel iommu in case of dgfx.


I'd like to hear from Baolu as well to confirm if intel_iommu driver is 
handling igfx + dgfx correctly in respect to the two global variables I 
mention in the commit message.


I strongly agree that the drivers should call the IOMMU interface
directly for portability. For Intel graphic driver, we have two issues:

#1) driver asks vt-d driver for identity map with intel_iommu=igfx_off.
#2) driver query the status with a global intel_iommu_gfx_mapped.

We need to solve these two problems step by step. This patch is
definitely a good start point.

Best regards,
baolu


Re: [Intel-gfx] [PATCH] drm/i915: Use per device iommu check

2021-11-09 Thread Lu Baolu

Hi Tvrtko,

On 2021/11/9 20:17, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin

On igfx + dgfx setups, it appears that intel_iommu=igfx_off option only
disables the igfx iommu. Stop relying on global intel_iommu_gfx_mapped
and probe presence of iommu domain per device to accurately reflect its
status.

Signed-off-by: Tvrtko Ursulin
Cc: Lu Baolu
---
Baolu, is my understanding here correct? Maybe I am confused by both
intel_iommu_gfx_mapped and dmar_map_gfx being globals in the intel_iommu
driver. But it certainly appears the setup can assign some iommu ops (and
assign the discrete i915 to iommu group) when those two are set to off.


diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index e967cd08f23e..9fb38a54f1fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1763,26 +1763,27 @@ static inline bool run_as_guest(void)
 #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \
  IS_ALDERLAKE_S(dev_priv))

-static inline bool intel_vtd_active(void)
+static inline bool intel_vtd_active(struct drm_i915_private *i915)
 {
-#ifdef CONFIG_INTEL_IOMMU
-   if (intel_iommu_gfx_mapped)
+   if (iommu_get_domain_for_dev(i915->drm.dev))
return true;
-#endif

/* Running as a guest, we assume the host is enforcing VT'd */
return run_as_guest();
 }

Have you verified this change? I am afraid that
iommu_get_domain_for_dev() always gets a valid iommu domain even
intel_iommu_gfx_mapped == 0.

A possible way could look like this:

static bool intel_vtd_active(struct drm_i915_private *i915)
{
struct iommu_domain *domain;

domain = iommu_get_domain_for_dev(i915->drm.dev);

if (domain && (domain->type & __IOMMU_DOMAIN_PAGING))
return true;

... ...
}

Actually I don't like this either since it checks the domain->type out
of the iommu subsystem. We could refactor this later by export an iommu
interface for this check.

Best regards,
baolu


Re: [Intel-gfx] [PATCH 1/4] iommu/vt-d: Disable superpage for Geminilake igfx

2021-07-13 Thread Lu Baolu

On 7/14/21 4:30 AM, Ville Syrjälä wrote:

On Tue, Jul 13, 2021 at 09:34:09AM +0800, Lu Baolu wrote:

On 7/12/21 11:47 PM, Ville Syrjälä wrote:

On Mon, Jul 12, 2021 at 07:23:07AM +0800, Lu Baolu wrote:

On 7/10/21 12:47 AM, Ville Syrjala wrote:

From: Ville Syrjälä

While running "gem_exec_big --r single" from igt-gpu-tools on
Geminilake as soon as a 2M mapping is made I tend to get a DMAR
write fault. Strangely the faulting address is always a 4K page
and usually very far away from the 2M page that got mapped.
But if no 2M mappings get used I can't reproduce the fault.

I also tried to dump the PTE for the faulting address but it actually
looks correct to me (ie. definitely seems to have the write bit set):
DMAR: DRHD: handling fault status reg 2
DMAR: [DMA Write] Request device [00:02.0] PASID  fault addr 
7fa8a78000 [fault reason 05] PTE Write access is not set
DMAR: fault 7fa8a78000 (level=1) PTE = 149efc003

So not really sure what's going on and this might just be full on duct
tape, but it seems to work here. The machine has now survived a whole day
running that test whereas with superpage enabled it fails in less than
a minute usually.

TODO: might be nice to disable superpage only for the igfx iommu
 instead of both iommus

If all these quirks are about igfx dedicated iommu's, I would suggest to
disable superpage only for the igfx ones.

Sure. Unfortunately there's no convenient mechanism to do that in
the iommu driver that I can immediately see. So not something I
can just whip up easily. Since you're actually familiar with the
driver maybe you can come up with a decent solution for that?


How about something like below? [no compile, no test...]

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 1131b8efb050..2d51ef288a9e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -338,6 +338,7 @@ static int intel_iommu_strict;
   static int intel_iommu_superpage = 1;
   static int iommu_identity_mapping;
   static int iommu_skip_te_disable;
+static int iommu_skip_igfx_superpage;

   #define IDENTMAP_GFX 2
   #define IDENTMAP_AZALIA  4
@@ -652,6 +653,27 @@ static bool domain_update_iommu_snooping(struct
intel_iommu *skip)
return ret;
   }

+static bool domain_use_super_page(struct dmar_domain *domain)
+{
+   struct dmar_drhd_unit *drhd;
+   struct intel_iommu *iommu;
+   bool ret = true;
+
+   if (!intel_iommu_superpage)
+   return false;
+
+   rcu_read_lock();
+   for_each_active_iommu(iommu, drhd) {
+   if (drhd->gfx_dedicated && iommu_skip_igfx_superpage) {
+   ret = false;
+   break

  ^
Missing semicolon. Othwerwise seems to work great here. Thanks.

Are you going to turn this into a proper patch, or do you
want me to just squash this into my patches and repost?



Please go ahead with a new version.

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/4] iommu/vt-d: Disable superpage for Geminilake igfx

2021-07-12 Thread Lu Baolu

On 7/12/21 11:47 PM, Ville Syrjälä wrote:

On Mon, Jul 12, 2021 at 07:23:07AM +0800, Lu Baolu wrote:

On 7/10/21 12:47 AM, Ville Syrjala wrote:

From: Ville Syrjälä

While running "gem_exec_big --r single" from igt-gpu-tools on
Geminilake as soon as a 2M mapping is made I tend to get a DMAR
write fault. Strangely the faulting address is always a 4K page
and usually very far away from the 2M page that got mapped.
But if no 2M mappings get used I can't reproduce the fault.

I also tried to dump the PTE for the faulting address but it actually
looks correct to me (ie. definitely seems to have the write bit set):
   DMAR: DRHD: handling fault status reg 2
   DMAR: [DMA Write] Request device [00:02.0] PASID  fault addr 
7fa8a78000 [fault reason 05] PTE Write access is not set
   DMAR: fault 7fa8a78000 (level=1) PTE = 149efc003

So not really sure what's going on and this might just be full on duct
tape, but it seems to work here. The machine has now survived a whole day
running that test whereas with superpage enabled it fails in less than
a minute usually.

TODO: might be nice to disable superpage only for the igfx iommu
instead of both iommus

If all these quirks are about igfx dedicated iommu's, I would suggest to
disable superpage only for the igfx ones.

Sure. Unfortunately there's no convenient mechanism to do that in
the iommu driver that I can immediately see. So not something I
can just whip up easily. Since you're actually familiar with the
driver maybe you can come up with a decent solution for that?



How about something like below? [no compile, no test...]

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 1131b8efb050..2d51ef288a9e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -338,6 +338,7 @@ static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
 static int iommu_skip_te_disable;
+static int iommu_skip_igfx_superpage;

 #define IDENTMAP_GFX   2
 #define IDENTMAP_AZALIA4
@@ -652,6 +653,27 @@ static bool domain_update_iommu_snooping(struct 
intel_iommu *skip)

return ret;
 }

+static bool domain_use_super_page(struct dmar_domain *domain)
+{
+   struct dmar_drhd_unit *drhd;
+   struct intel_iommu *iommu;
+   bool ret = true;
+
+   if (!intel_iommu_superpage)
+   return false;
+
+   rcu_read_lock();
+   for_each_active_iommu(iommu, drhd) {
+   if (drhd->gfx_dedicated && iommu_skip_igfx_superpage) {
+   ret = false;
+   break
+   }
+   }
+   rcu_read_unlock();
+
+   return ret;
+}
+
 static int domain_update_iommu_superpage(struct dmar_domain *domain,
 struct intel_iommu *skip)
 {
@@ -659,7 +681,7 @@ static int domain_update_iommu_superpage(struct 
dmar_domain *domain,

struct intel_iommu *iommu;
int mask = 0x3;

-   if (!intel_iommu_superpage)
+   if (!domain_use_super_page(domain))
return 0;

/* set iommu_superpage to the smallest common denominator */
@@ -5656,6 +5678,14 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 
0x1632, quirk_iommu_igfx);

 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);

+static void quirk_skip_igfx_superpage(struct pci_dev *dev)
+{
+   pci_info(dev, "Disabling IOMMU superpage for graphics on this 
chipset\n");
+   iommu_skip_igfx_superpage = 1;
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3184, 
quirk_skip_igfx_superpage);

+
 static void quirk_iommu_rwbf(struct pci_dev *dev)
 {
if (risky_device(dev))

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/4] iommu/vt-d: Disable superpage for Geminilake igfx

2021-07-11 Thread Lu Baolu

On 7/10/21 12:47 AM, Ville Syrjala wrote:

From: Ville Syrjälä 

While running "gem_exec_big --r single" from igt-gpu-tools on
Geminilake as soon as a 2M mapping is made I tend to get a DMAR
write fault. Strangely the faulting address is always a 4K page
and usually very far away from the 2M page that got mapped.
But if no 2M mappings get used I can't reproduce the fault.

I also tried to dump the PTE for the faulting address but it actually
looks correct to me (ie. definitely seems to have the write bit set):
  DMAR: DRHD: handling fault status reg 2
  DMAR: [DMA Write] Request device [00:02.0] PASID  fault addr 
7fa8a78000 [fault reason 05] PTE Write access is not set
  DMAR: fault 7fa8a78000 (level=1) PTE = 149efc003

So not really sure what's going on and this might just be full on duct
tape, but it seems to work here. The machine has now survived a whole day
running that test whereas with superpage enabled it fails in less than
a minute usually.

TODO: might be nice to disable superpage only for the igfx iommu
   instead of both iommus


If all these quirks are about igfx dedicated iommu's, I would suggest to
disable superpage only for the igfx ones.

Best regards,
baolu


TODO: would be nice to use the macros from include/drm/i915_pciids.h,
   but can't do that with DECLARE_PCI_FIXUP_HEADER()

Cc: David Woodhouse 
Cc: Lu Baolu 
Cc: io...@lists.linux-foundation.org
Signed-off-by: Ville Syrjälä 
---
  drivers/iommu/intel/iommu.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 19c7888cbb86..4fff2c9c86af 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5617,6 +5617,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, 
quirk_iommu_igfx);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
  
+static void quirk_iommu_nosp(struct pci_dev *dev)

+{
+   pci_info(dev, "Disabling IOMMU superpage for graphics on this 
chipset\n");
+   intel_iommu_superpage = 0;
+}
+
+/* Geminilake igfx appears to have issues with superpage */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3184, quirk_iommu_nosp);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3185, quirk_iommu_nosp);
+
  static void quirk_iommu_rwbf(struct pci_dev *dev)
  {
if (risky_device(dev))


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-11-20 Thread Lu Baolu

On 2020/11/3 18:54, Joerg Roedel wrote:

Hi,

On Tue, Nov 03, 2020 at 11:58:26AM +0200, Joonas Lahtinen wrote:

Would that work for you? We intend to send the feature pull requests
to DRM for 5.11 in the upcoming weeks.


For the IOMMU side it is best to include the workaround for now. When
the DRM fixes are merged into v5.11-rc1 together with this conversion,
it can be reverted and will not be in 5.11-final.


Okay! So I will keep the workaround and send a new version (mostly
rebase) to Will.

Best regards,
baolu

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-11-02 Thread Lu Baolu

On 11/2/20 7:52 PM, Tvrtko Ursulin wrote:


On 02/11/2020 02:00, Lu Baolu wrote:

Hi Tvrtko,
On 10/12/20 4:44 PM, Tvrtko Ursulin wrote:


On 29/09/2020 01:11, Lu Baolu wrote:

Hi Tvrtko,

On 9/28/20 5:44 PM, Tvrtko Ursulin wrote:


On 27/09/2020 07:34, Lu Baolu wrote:

Hi,

The previous post of this series could be found here.

https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ 



This version introduce a new patch [4/7] to fix an issue reported 
here.


https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ 



There aren't any other changes.

Please help to test and review.

Best regards,
baolu

Lu Baolu (3):
   iommu: Add quirk for Intel graphic devices in map_sg


Since I do have patches to fix i915 to handle this, do we want to 
co-ordinate the two and avoid having to add this quirk and then 
later remove it? Or you want to go the staged approach?


I have no preference. It depends on which patch goes first. Let the
maintainers help here.


FYI we have merged the required i915 patches to out tree last week or 
so. I *think* this means they will go into 5.11. So the i915 specific 
workaround patch will not be needed in Intel IOMMU.


Do you mind telling me what's the status of this fix patch? I tried this
series on v5.10-rc1 with the graphic quirk patch dropped. I am still
seeing dma faults from graphic device.


Hmm back then I thought i915 fixes for this would land in 5.11 so I will 
stick with that. :) (See my quoted text a paragraph above yours.)


What size are those fixes? I am considering pushing this series for
v5.11. Is it possible to get some acks for those patches and let them
go to Linus through iommu tree?

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-11-01 Thread Lu Baolu

Hi Tvrtko,

On 10/12/20 4:44 PM, Tvrtko Ursulin wrote:


On 29/09/2020 01:11, Lu Baolu wrote:

Hi Tvrtko,

On 9/28/20 5:44 PM, Tvrtko Ursulin wrote:


On 27/09/2020 07:34, Lu Baolu wrote:

Hi,

The previous post of this series could be found here.

https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ 



This version introduce a new patch [4/7] to fix an issue reported here.

https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ 



There aren't any other changes.

Please help to test and review.

Best regards,
baolu

Lu Baolu (3):
   iommu: Add quirk for Intel graphic devices in map_sg


Since I do have patches to fix i915 to handle this, do we want to 
co-ordinate the two and avoid having to add this quirk and then later 
remove it? Or you want to go the staged approach?


I have no preference. It depends on which patch goes first. Let the
maintainers help here.


FYI we have merged the required i915 patches to out tree last week or 
so. I *think* this means they will go into 5.11. So the i915 specific 
workaround patch will not be needed in Intel IOMMU.


Do you mind telling me what's the status of this fix patch? I tried this
series on v5.10-rc1 with the graphic quirk patch dropped. I am still
seeing dma faults from graphic device.

Best regards,
baolu



Regards,

Tvrtko

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-10-12 Thread Lu Baolu

Hi Tvrtko,

On 10/12/20 4:44 PM, Tvrtko Ursulin wrote:


On 29/09/2020 01:11, Lu Baolu wrote:

Hi Tvrtko,

On 9/28/20 5:44 PM, Tvrtko Ursulin wrote:


On 27/09/2020 07:34, Lu Baolu wrote:

Hi,

The previous post of this series could be found here.

https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ 



This version introduce a new patch [4/7] to fix an issue reported here.

https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ 



There aren't any other changes.

Please help to test and review.

Best regards,
baolu

Lu Baolu (3):
   iommu: Add quirk for Intel graphic devices in map_sg


Since I do have patches to fix i915 to handle this, do we want to 
co-ordinate the two and avoid having to add this quirk and then later 
remove it? Or you want to go the staged approach?


I have no preference. It depends on which patch goes first. Let the
maintainers help here.


FYI we have merged the required i915 patches to out tree last week or 
so. I *think* this means they will go into 5.11. So the i915 specific 
workaround patch will not be needed in Intel IOMMU.


Thanks for letting us know this. I will drop the workaround patch and
test the whole series after the next rc1.

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-10-02 Thread Lu Baolu

Hi Joerg,

On 2020/10/1 20:17, Joerg Roedel wrote:

Hi Baolu,

On Tue, Sep 29, 2020 at 08:11:35AM +0800, Lu Baolu wrote:

I have no preference. It depends on which patch goes first. Let the
maintainers help here.


No preference on my side, except that it is too late for this now to
make it into v5.10. Besides that I let the decission up to you when this
is ready. Just send me a pull-request when it should get into the
iommu-tree.


Sure.

Best regards,
baolu



Regards,

Joerg


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-09-28 Thread Lu Baolu

Hi Tvrtko,

On 9/28/20 5:44 PM, Tvrtko Ursulin wrote:


On 27/09/2020 07:34, Lu Baolu wrote:

Hi,

The previous post of this series could be found here.

https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/ 



This version introduce a new patch [4/7] to fix an issue reported here.

https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/ 



There aren't any other changes.

Please help to test and review.

Best regards,
baolu

Lu Baolu (3):
   iommu: Add quirk for Intel graphic devices in map_sg


Since I do have patches to fix i915 to handle this, do we want to 
co-ordinate the two and avoid having to add this quirk and then later 
remove it? Or you want to go the staged approach?


I have no preference. It depends on which patch goes first. Let the
maintainers help here.

Best regards,
baolu



Regards,

Tvrtko


   iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev
   iommu/vt-d: Cleanup after converting to dma-iommu ops

Tom Murphy (4):
   iommu: Handle freelists when using deferred flushing in iommu drivers
   iommu: Add iommu_dma_free_cpu_cached_iovas()
   iommu: Allow the dma-iommu api to use bounce buffers
   iommu/vt-d: Convert intel iommu driver to the iommu ops

  .../admin-guide/kernel-parameters.txt |   5 -
  drivers/iommu/dma-iommu.c | 228 -
  drivers/iommu/intel/Kconfig   |   1 +
  drivers/iommu/intel/iommu.c   | 901 +++---
  include/linux/dma-iommu.h |   8 +
  include/linux/iommu.h |   1 +
  6 files changed, 336 insertions(+), 808 deletions(-)


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 7/7] iommu/vt-d: Cleanup after converting to dma-iommu ops

2020-09-27 Thread Lu Baolu
Some cleanups after converting the driver to use dma-iommu ops.
- Remove nobounce option;
- Cleanup and simplify the path in domain mapping.

Signed-off-by: Lu Baolu 
---
 .../admin-guide/kernel-parameters.txt |  5 --
 drivers/iommu/intel/iommu.c   | 90 ++-
 2 files changed, 28 insertions(+), 67 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..0d11ef43d314 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1866,11 +1866,6 @@
Note that using this option lowers the security
provided by tboot because it makes the system
vulnerable to DMA attacks.
-   nobounce [Default off]
-   Disable bounce buffer for untrusted devices such as
-   the Thunderbolt devices. This will treat the untrusted
-   devices as the trusted ones, hence might expose security
-   risks of DMA attacks.
 
intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
0   disables intel_idle and fall back on acpi_idle.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 69ccf92ab37b..5135d9ba0993 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -355,7 +355,6 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
-static int intel_no_bounce;
 static int iommu_skip_te_disable;
 
 #define IDENTMAP_GFX   2
@@ -457,9 +456,6 @@ static int __init intel_iommu_setup(char *str)
} else if (!strncmp(str, "tboot_noforce", 13)) {
pr_info("Intel-IOMMU: not forcing on after tboot. This 
could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
-   } else if (!strncmp(str, "nobounce", 8)) {
-   pr_info("Intel-IOMMU: No bounce buffer. This could 
expose security risks of DMA attacks\n");
-   intel_no_bounce = 1;
}
 
str += strcspn(str, ",");
@@ -2277,15 +2273,14 @@ static inline int hardware_largepage_caps(struct 
dmar_domain *domain,
return level;
 }
 
-static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
-   struct scatterlist *sg, unsigned long phys_pfn,
-   unsigned long nr_pages, int prot)
+static int
+__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+unsigned long phys_pfn, unsigned long nr_pages, int prot)
 {
struct dma_pte *first_pte = NULL, *pte = NULL;
-   phys_addr_t pteval;
-   unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+   phys_addr_t pteval;
u64 attr;
 
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
@@ -2297,26 +2292,14 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
if (domain_use_first_level(domain))
attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
 
-   if (!sg) {
-   sg_res = nr_pages;
-   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
-   }
+   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
 
while (nr_pages > 0) {
uint64_t tmp;
 
-   if (!sg_res) {
-   unsigned int pgoff = sg->offset & ~PAGE_MASK;
-
-   sg_res = aligned_nrpages(sg->offset, sg->length);
-   sg->dma_address = ((dma_addr_t)iov_pfn << 
VTD_PAGE_SHIFT) + pgoff;
-   sg->dma_length = sg->length;
-   pteval = (sg_phys(sg) - pgoff) | attr;
-   phys_pfn = pteval >> VTD_PAGE_SHIFT;
-   }
-
if (!pte) {
-   largepage_lvl = hardware_largepage_caps(domain, 
iov_pfn, phys_pfn, sg_res);
+   largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
+   phys_pfn, nr_pages);
 
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, 
_lvl);
if (!pte)
@@ -2328,7 +2311,7 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
pteval |= DMA_PTE_LARGE_PAGE;
lvl_pages = lvl_to_nr_pages(largepage_lvl);
 
-   nr_superpages = sg_res / lvl_pages;
+   nr_superpages = nr_pages / lvl_pages;
end_pfn = 

[Intel-gfx] [PATCH v4 1/7] iommu: Handle freelists when using deferred flushing in iommu drivers

2020-09-27 Thread Lu Baolu
From: Tom Murphy 

Allow the iommu_unmap_fast to return newly freed page table pages and
pass the freelist to queue_iova in the dma-iommu ops path.

This is useful for iommu drivers (in this case the intel iommu driver)
which need to wait for the ioTLB to be flushed before newly
free/unmapped page table pages can be freed. This way we can still batch
ioTLB free operations and handle the freelists.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c   | 29 +--
 drivers/iommu/intel/iommu.c | 55 -
 include/linux/iommu.h   |  1 +
 3 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index cd6e3c70ebb3..1b8ef3a2cbc3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -50,6 +50,18 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+static void iommu_dma_entry_dtor(unsigned long data)
+{
+   struct page *freelist = (struct page *)data;
+
+   while (freelist) {
+   unsigned long p = (unsigned long)page_address(freelist);
+
+   freelist = freelist->freelist;
+   free_page(p);
+   }
+}
+
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 {
if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
@@ -344,7 +356,7 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, ) && attr) {
if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
-   NULL))
+ iommu_dma_entry_dtor))
pr_warn("iova flush queue initialization failed\n");
else
cookie->fq_domain = domain;
@@ -441,7 +453,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
-   dma_addr_t iova, size_t size)
+   dma_addr_t iova, size_t size, struct page *freelist)
 {
struct iova_domain *iovad = >iovad;
 
@@ -450,7 +462,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie 
*cookie,
cookie->msi_iova -= size;
else if (cookie->fq_domain) /* non-strict mode */
queue_iova(iovad, iova_pfn(iovad, iova),
-   size >> iova_shift(iovad), 0);
+   size >> iova_shift(iovad),
+   (unsigned long)freelist);
else
free_iova_fast(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad));
@@ -475,7 +488,7 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
 
if (!cookie->fq_domain)
iommu_iotlb_sync(domain, _gather);
-   iommu_dma_free_iova(cookie, dma_addr, size);
+   iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -497,7 +510,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return DMA_MAPPING_ERROR;
 
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -649,7 +662,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
 out_free_sg:
sg_free_table();
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -900,7 +913,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, iova_len);
+   iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
__invalidate_sg(sg, nents);
return 0;
@@ -1194,7 +1207,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
return msi_page;
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_page:
kfree(msi_page);
return NULL;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 722545f61ba2..fdd514c8b2d4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1243,17 +1243,17 @@ static struct page *dma_pte_clear_level(struct 
dmar_domain *domain, int level,
pages can only 

[Intel-gfx] [PATCH v4 6/7] iommu/vt-d: Convert intel iommu driver to the iommu ops

2020-09-27 Thread Lu Baolu
From: Tom Murphy 

Convert the intel iommu driver to the dma-iommu api. Remove the iova
handling and reserve region code from the intel iommu driver.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/Kconfig |   1 +
 drivers/iommu/intel/iommu.c | 742 ++--
 2 files changed, 43 insertions(+), 700 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 5337ee1584b0..28a3d1596c76 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -13,6 +13,7 @@ config INTEL_IOMMU
select DMAR_TABLE
select SWIOTLB
select IOASID
+   select IOMMU_DMA
help
  DMA remapping (DMAR) devices support enables independent address
  translations for Direct Memory Access (DMA) from devices.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7d3c73d1e498..69ccf92ab37b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -41,7 +42,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -382,9 +382,6 @@ struct device_domain_info *get_domain_info(struct device 
*dev)
 DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
-#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&   \
-   to_pci_dev(d)->untrusted)
-
 /*
  * Iterate over elements in device_domain_list and call the specified
  * callback @fn against each element.
@@ -1289,13 +1286,6 @@ static void dma_free_pagelist(struct page *freelist)
}
 }
 
-static void iova_entry_free(unsigned long data)
-{
-   struct page *freelist = (struct page *)data;
-
-   dma_free_pagelist(freelist);
-}
-
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1660,19 +1650,17 @@ static inline void __mapping_notify_one(struct 
intel_iommu *iommu,
iommu_flush_write_buffer(iommu);
 }
 
-static void iommu_flush_iova(struct iova_domain *iovad)
+static void intel_flush_iotlb_all(struct iommu_domain *domain)
 {
-   struct dmar_domain *domain;
+   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
int idx;
 
-   domain = container_of(iovad, struct dmar_domain, iovad);
-
-   for_each_domain_iommu(idx, domain) {
+   for_each_domain_iommu(idx, dmar_domain) {
struct intel_iommu *iommu = g_iommus[idx];
-   u16 did = domain->iommu_did[iommu->seq_id];
+   u16 did = dmar_domain->iommu_did[iommu->seq_id];
 
-   if (domain_use_first_level(domain))
-   domain_flush_piotlb(iommu, domain, 0, -1, 0);
+   if (domain_use_first_level(dmar_domain))
+   domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
else
iommu->flush.flush_iotlb(iommu, did, 0, 0,
 DMA_TLB_DSI_FLUSH);
@@ -1954,48 +1942,6 @@ static int domain_detach_iommu(struct dmar_domain 
*domain,
return count;
 }
 
-static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_rbtree_key;
-
-static int dmar_init_reserved_ranges(void)
-{
-   struct pci_dev *pdev = NULL;
-   struct iova *iova;
-   int i;
-
-   init_iova_domain(_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
-
-   lockdep_set_class(_iova_list.iova_rbtree_lock,
-   _rbtree_key);
-
-   /* IOAPIC ranges shouldn't be accessed by DMA */
-   iova = reserve_iova(_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
-   IOVA_PFN(IOAPIC_RANGE_END));
-   if (!iova) {
-   pr_err("Reserve IOAPIC range failed\n");
-   return -ENODEV;
-   }
-
-   /* Reserve all PCI MMIO to avoid peer-to-peer access */
-   for_each_pci_dev(pdev) {
-   struct resource *r;
-
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   r = >resource[i];
-   if (!r->flags || !(r->flags & IORESOURCE_MEM))
-   continue;
-   iova = reserve_iova(_iova_list,
-   IOVA_PFN(r->start),
-   IOVA_PFN(r->end));
-   if (!iova) {
-   pci_err(pdev, "Reserve iova for %pR failed\n", 
r);
-   return -ENODEV;
-   }
-   }
-   }
-   return 0;
-}
-
 static inline int guestwidth_to_adjustwidth(int gaw)
 {
int agaw;
@@ -2018,7 +1964,7 @@ static void domain_exit(struct dmar_domain *domain)
 
/* destroy iovas */
if (domain->domain.type == IOMMU_DOMAIN_DMA)
- 

[Intel-gfx] [PATCH v4 4/7] iommu: Add quirk for Intel graphic devices in map_sg

2020-09-27 Thread Lu Baolu
Combining the sg segments exposes a bug in the Intel i915 driver which
causes visual artifacts and the screen to freeze. This is most likely
because of how the i915 handles the returned list. It probably doesn't
respect the returned value specifying the number of elements in the list
and instead depends on the previous behaviour of the Intel iommu driver
which would return the same number of elements in the output list as in
the input list.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 3526db774611..e7e4d758f51a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -879,6 +879,33 @@ static int __finalise_sg(struct device *dev, struct 
scatterlist *sg, int nents,
unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
int i, count = 0;
 
+   /*
+* The Intel graphic driver is used to assume that the returned
+* sg list is not combound. This blocks the efforts of converting
+* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the
+* device driver work and should be removed once it's fixed in i915
+* driver.
+*/
+   if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) &&
+   to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL &&
+   (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+   for_each_sg(sg, s, nents, i) {
+   unsigned int s_iova_off = sg_dma_address(s);
+   unsigned int s_length = sg_dma_len(s);
+   unsigned int s_iova_len = s->length;
+
+   s->offset += s_iova_off;
+   s->length = s_length;
+   sg_dma_address(s) = dma_addr + s_iova_off;
+   sg_dma_len(s) = s_length;
+   dma_addr += s_iova_len;
+
+   pr_info_once("sg combining disabled due to i915 
driver\n");
+   }
+
+   return nents;
+   }
+
for_each_sg(sg, s, nents, i) {
/* Restore this segment's original unaligned fields first */
unsigned int s_iova_off = sg_dma_address(s);
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 2/7] iommu: Add iommu_dma_free_cpu_cached_iovas()

2020-09-27 Thread Lu Baolu
From: Tom Murphy 

Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which
use the dma-iommu ops to free cached cpu iovas.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 9 +
 include/linux/dma-iommu.h | 8 
 2 files changed, 17 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1b8ef3a2cbc3..fb84cfa83703 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -50,6 +50,15 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+
+   free_cpu_cached_iovas(cpu, iovad);
+}
+
 static void iommu_dma_entry_dtor(unsigned long data)
 {
struct page *freelist = (struct page *)data;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 2112f21f73d8..706b68d1359b 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc,
 
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain);
+
 #else /* CONFIG_IOMMU_DMA */
 
 struct iommu_domain;
@@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device 
*dev, struct list_he
 {
 }
 
+static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+}
+
 #endif /* CONFIG_IOMMU_DMA */
 #endif /* __DMA_IOMMU_H */
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 5/7] iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev

2020-09-27 Thread Lu Baolu
The iommu-dma constrains IOVA allocation based on the domain geometry
that the driver reports. Update domain geometry everytime a domain is
attached to or detached from a device.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index fdd514c8b2d4..7d3c73d1e498 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -67,8 +67,8 @@
 #define MAX_AGAW_WIDTH 64
 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
 
-#define __DOMAIN_MAX_PFN(gaw)  uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << gaw) - 1)
+#define __DOMAIN_MAX_PFN(gaw)  uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 
1)
+#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << (gaw)) - 1)
 
 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
to match. That way, we can use 'unsigned long' for PFNs with impunity. */
@@ -739,6 +739,18 @@ static void domain_update_iommu_cap(struct dmar_domain 
*domain)
 */
if (domain->nid == NUMA_NO_NODE)
domain->nid = domain_update_device_node(domain);
+
+   /*
+* First-level translation restricts the input-address to a
+* canonical address (i.e., address bits 63:N have the same
+* value as address bit [N-1], where N is 48-bits with 4-level
+* paging and 57-bits with 5-level paging). Hence, skip bit
+* [N-1].
+*/
+   if (domain_use_first_level(domain))
+   domain->domain.geometry.aperture_end = 
__DOMAIN_MAX_ADDR(domain->gaw - 1);
+   else
+   domain->domain.geometry.aperture_end = 
__DOMAIN_MAX_ADDR(domain->gaw);
 }
 
 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 3/7] iommu: Allow the dma-iommu api to use bounce buffers

2020-09-27 Thread Lu Baolu
From: Tom Murphy 

Allow the dma-iommu api to use bounce buffers for untrusted devices.
This is a copy of the intel bounce buffer code.

Signed-off-by: Tom Murphy 
Co-developed-by: Lu Baolu 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 163 +++---
 1 file changed, 150 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index fb84cfa83703..3526db774611 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -21,9 +21,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 struct iommu_dma_msi_page {
struct list_headlist;
@@ -500,6 +502,31 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
+static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
+   size_t size, enum dma_data_direction dir,
+   unsigned long attrs)
+{
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   phys_addr_t phys;
+
+   phys = iommu_iova_to_phys(domain, dma_addr);
+   if (WARN_ON(!phys))
+   return;
+
+   __iommu_dma_unmap(dev, dma_addr, size);
+
+   if (unlikely(is_swiotlb_buffer(phys)))
+   swiotlb_tbl_unmap_single(dev, phys, size,
+   iova_align(iovad, size), dir, attrs);
+}
+
+static bool dev_is_untrusted(struct device *dev)
+{
+   return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+}
+
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -525,6 +552,55 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
+static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
+   size_t org_size, dma_addr_t dma_mask, bool coherent,
+   enum dma_data_direction dir, unsigned long attrs)
+{
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = org_size;
+   void *padding_start;
+   size_t padding_size;
+   dma_addr_t iova;
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | org_size)) {
+   aligned_size = iova_align(iovad, org_size);
+   phys = swiotlb_tbl_map_single(dev,
+   __phys_to_dma(dev, io_tlb_start),
+   phys, org_size, aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+   return DMA_MAPPING_ERROR;
+
+   /* Cleanup the padding area. */
+   padding_start = phys_to_virt(phys);
+   padding_size = aligned_size;
+
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+   (dir == DMA_TO_DEVICE ||
+dir == DMA_BIDIRECTIONAL)) {
+   padding_start += org_size;
+   padding_size -= org_size;
+   }
+
+   memset(padding_start, 0, padding_size);
+   }
+
+   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+   swiotlb_tbl_unmap_single(dev, phys, org_size,
+   aligned_size, dir, attrs);
+
+   return iova;
+}
+
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -697,11 +773,15 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   arch_sync_dma_for_cpu(phys, size, dir);
+   if (!dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
+   if (is_swiotlb_buffer(phys))
+   swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
 }
 
 static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -709,11 +789,15 @@ static void iommu_dma_sync_single_for_device(struct 
device *dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return

[Intel-gfx] [PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-09-27 Thread Lu Baolu
Hi,

The previous post of this series could be found here.

https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/

This version introduce a new patch [4/7] to fix an issue reported here.

https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/

There aren't any other changes.

Please help to test and review.

Best regards,
baolu

Lu Baolu (3):
  iommu: Add quirk for Intel graphic devices in map_sg
  iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev
  iommu/vt-d: Cleanup after converting to dma-iommu ops

Tom Murphy (4):
  iommu: Handle freelists when using deferred flushing in iommu drivers
  iommu: Add iommu_dma_free_cpu_cached_iovas()
  iommu: Allow the dma-iommu api to use bounce buffers
  iommu/vt-d: Convert intel iommu driver to the iommu ops

 .../admin-guide/kernel-parameters.txt |   5 -
 drivers/iommu/dma-iommu.c | 228 -
 drivers/iommu/intel/Kconfig   |   1 +
 drivers/iommu/intel/iommu.c   | 901 +++---
 include/linux/dma-iommu.h |   8 +
 include/linux/iommu.h |   1 +
 6 files changed, 336 insertions(+), 808 deletions(-)

-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api

2020-09-23 Thread Lu Baolu

Hi Tvrtko,

On 9/15/20 4:31 PM, Tvrtko Ursulin wrote:
With the previous version of the series I hit a problem on Ivybridge 
where apparently the dma engine width is not respected. At least that 
is my layman interpretation of the errors. From the older thread:


<3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not 
sufficient for the mapped address (008000)


Relevant iommu boot related messages are:

<6>[    0.184234] DMAR: Host address width 36
<6>[    0.184245] DMAR: DRHD base: 0x00fed9 flags: 0x0
<6>[    0.184288] DMAR: dmar0: reg_base_addr fed9 ver 1:0 cap 
c020e60262 ecap f0101a

<6>[    0.184308] DMAR: DRHD base: 0x00fed91000 flags: 0x1
<6>[    0.184337] DMAR: dmar1: reg_base_addr fed91000 ver 1:0 cap 
c9008020660262 ecap f0105a
<6>[    0.184357] DMAR: RMRR base: 0x00d8d28000 end: 
0x00d8d46fff
<6>[    0.184377] DMAR: RMRR base: 0x00db00 end: 
0x00df1f
<6>[    0.184398] DMAR-IR: IOAPIC id 2 under DRHD base  0xfed91000 
IOMMU 1

<6>[    0.184414] DMAR-IR: HPET id 0 under DRHD base 0xfed91000
<6>[    0.184428] DMAR-IR: Queued invalidation will be enabled to 
support x2apic and Intr-remapping.

<6>[    0.185173] DMAR-IR: Enabled IRQ remapping in x2apic mode

<6>[    0.878934] DMAR: No ATSR found
<6>[    0.878966] DMAR: dmar0: Using Queued invalidation
<6>[    0.879007] DMAR: dmar1: Using Queued invalidation

<6>[    0.915032] DMAR: Intel(R) Virtualization Technology for 
Directed I/O
<6>[    0.915060] PCI-DMA: Using software bounce buffering for IO 
(SWIOTLB)
<6>[    0.915084] software IO TLB: mapped [mem 0xc80d4000-0xcc0d4000] 
(64MB)


(Full boot log at 
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/boot0.txt, 
failures at 
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/igt@i915_selftest@l...@blt.html.) 



Does this look familiar or at least plausible to you? Is this 
something your new series has fixed?


This happens during attaching a domain to device. It has nothing to do
with this patch series. I will look into this issue, but not in this
email thread context.


I am not sure what step is attaching domain to device, but these type 
messages:


<3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not
 >> sufficient for the mapped address (008000)

They definitely appear to happen at runtime, as i915 is getting 
exercised by userspace.


Can you please check whether below change helps here?

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index c8323a9f8bde..0484c539debc 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -724,6 +724,7 @@ static int domain_update_device_node(struct 
dmar_domain *domain)

 /* Some capabilities may be different across iommus */
 static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
+   domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
domain_update_iommu_coherency(domain);
domain->iommu_snooping = domain_update_iommu_snooping(NULL);
domain->iommu_superpage = domain_update_iommu_superpage(domain, 
NULL);


Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api

2020-09-22 Thread Lu Baolu

On 9/22/20 7:05 PM, Robin Murphy wrote:
With the previous version of the series I hit a problem on Ivybridge 
where apparently the dma engine width is not respected. At least 
that is my layman interpretation of the errors. From the older thread:


<3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not 
sufficient for the mapped address (008000)


Relevant iommu boot related messages are:

<6>[    0.184234] DMAR: Host address width 36
<6>[    0.184245] DMAR: DRHD base: 0x00fed9 flags: 0x0
<6>[    0.184288] DMAR: dmar0: reg_base_addr fed9 ver 1:0 cap 
c020e60262 ecap f0101a

<6>[    0.184308] DMAR: DRHD base: 0x00fed91000 flags: 0x1
<6>[    0.184337] DMAR: dmar1: reg_base_addr fed91000 ver 1:0 cap 
c9008020660262 ecap f0105a
<6>[    0.184357] DMAR: RMRR base: 0x00d8d28000 end: 
0x00d8d46fff
<6>[    0.184377] DMAR: RMRR base: 0x00db00 end: 
0x00df1f
<6>[    0.184398] DMAR-IR: IOAPIC id 2 under DRHD base  0xfed91000 
IOMMU 1

<6>[    0.184414] DMAR-IR: HPET id 0 under DRHD base 0xfed91000
<6>[    0.184428] DMAR-IR: Queued invalidation will be enabled to 
support x2apic and Intr-remapping.

<6>[    0.185173] DMAR-IR: Enabled IRQ remapping in x2apic mode

<6>[    0.878934] DMAR: No ATSR found
<6>[    0.878966] DMAR: dmar0: Using Queued invalidation
<6>[    0.879007] DMAR: dmar1: Using Queued invalidation

<6>[    0.915032] DMAR: Intel(R) Virtualization Technology for 
Directed I/O
<6>[    0.915060] PCI-DMA: Using software bounce buffering for IO 
(SWIOTLB)
<6>[    0.915084] software IO TLB: mapped [mem 
0xc80d4000-0xcc0d4000] (64MB)


(Full boot log at 
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/boot0.txt, 
failures at 
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/igt@i915_selftest@l...@blt.html.) 



Does this look familiar or at least plausible to you? Is this 
something your new series has fixed?


This happens during attaching a domain to device. It has nothing to do
with this patch series. I will look into this issue, but not in this
email thread context.


I am not sure what step is attaching domain to device, but these type 
messages:


<3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not
 >> sufficient for the mapped address (008000)

They definitely appear to happen at runtime, as i915 is getting 
exercised by userspace.


AFAICS this certainly might be related to this series - iommu-dma will 


Oh! I looked at the wrong function. prepare_domain_attach_device()
prints a similar message which made me believe that it was not caused
by the this patches series.

constrain IOVA allocation based on the domain geometry that the driver 
reports, which in this case is set only once when first allocating the 
domain. Thus it looks like both the dmar_domain->gaw adjustment in 
prepare_domain_attach_device() and the domain_use_first_level() business 
in intel_alloc_iova() effectively get lost in this conversion, since the 
domain geometry never gets updated to reflect those additional constraints.


Sounds reasonable. I will look into the code and work out a fix.


> Robin.



Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api

2020-09-21 Thread Lu Baolu

Hi Logan,

On 9/21/20 11:48 PM, Logan Gunthorpe wrote:



On 2020-09-20 12:36 a.m., Lu Baolu wrote:

Hi Logan,

On 2020/9/19 4:47, Logan Gunthorpe wrote:

Hi Lu,

On 2020-09-11 9:21 p.m., Lu Baolu wrote:

Tom Murphy has almost done all the work. His latest patch series was
posted here.

https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/

Thanks a lot!

This series is a follow-up with below changes:

1. Add a quirk for the i915 driver issue described in Tom's cover
letter.
2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use
bounce buffers" to make the bounce buffer work for untrusted devices.
3. Several cleanups in iommu/vt-d driver after the conversion.



I'm trying to test this on an old Sandy Bridge, but found that I get
spammed with warnings on boot. I've put a sample of a few of them below.
They all seem to be related to ioat.

I had the same issue with Tom's v2 but never saw this on his v1.


Have you verified whether this could be reproduced with the lasted
upstream kernel (without this patch series)?


Yes.


I am sorry. Just want to make sure I understand you correctly. :-) When
you say "yes", do you mean you could reproduce this with pure upstream
kernel (5.9-rc6)?


Also, it's hitting a warning in the dma-iommu code which would not
be hit without this patch set.


Without this series, DMA APIs don't go through dma-iommu. Do you mind
posting the warning message?

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api

2020-09-20 Thread Lu Baolu

Hi Logan,

On 2020/9/19 4:47, Logan Gunthorpe wrote:

Hi Lu,

On 2020-09-11 9:21 p.m., Lu Baolu wrote:

Tom Murphy has almost done all the work. His latest patch series was
posted here.

https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/

Thanks a lot!

This series is a follow-up with below changes:

1. Add a quirk for the i915 driver issue described in Tom's cover
letter.
2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use
bounce buffers" to make the bounce buffer work for untrusted devices.
3. Several cleanups in iommu/vt-d driver after the conversion.



I'm trying to test this on an old Sandy Bridge, but found that I get
spammed with warnings on boot. I've put a sample of a few of them below.
They all seem to be related to ioat.

I had the same issue with Tom's v2 but never saw this on his v1.


Have you verified whether this could be reproduced with the lasted
upstream kernel (without this patch series)?

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api

2020-09-14 Thread Lu Baolu

Hi Tvrtko,

On 9/14/20 4:04 PM, Tvrtko Ursulin wrote:


Hi,

On 12/09/2020 04:21, Lu Baolu wrote:

Tom Murphy has almost done all the work. His latest patch series was
posted here.

https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/

Thanks a lot!

This series is a follow-up with below changes:

1. Add a quirk for the i915 driver issue described in Tom's cover
letter.


Last week I have copied you on an i915 series which appears to remove the need 
for this quirk. so if we get those i915 patches reviewed and merged, do you 
still want to pursue this quirk?


It's up to the graphic guys. I don't know the details in i915 driver.
I don't think my tests could cover all cases.




2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use
bounce buffers" to make the bounce buffer work for untrusted devices.
3. Several cleanups in iommu/vt-d driver after the conversion.


With the previous version of the series I hit a problem on Ivybridge where 
apparently the dma engine width is not respected. At least that is my layman 
interpretation of the errors. From the older thread:

<3> [209.526605] DMAR: intel_iommu_map: iommu width (39) is not sufficient for 
the mapped address (008000)

Relevant iommu boot related messages are:

<6>[0.184234] DMAR: Host address width 36
<6>[0.184245] DMAR: DRHD base: 0x00fed9 flags: 0x0
<6>[0.184288] DMAR: dmar0: reg_base_addr fed9 ver 1:0 cap 
c020e60262 ecap f0101a
<6>[0.184308] DMAR: DRHD base: 0x00fed91000 flags: 0x1
<6>[0.184337] DMAR: dmar1: reg_base_addr fed91000 ver 1:0 cap 
c9008020660262 ecap f0105a
<6>[0.184357] DMAR: RMRR base: 0x00d8d28000 end: 0x00d8d46fff
<6>[0.184377] DMAR: RMRR base: 0x00db00 end: 0x00df1f
<6>[0.184398] DMAR-IR: IOAPIC id 2 under DRHD base  0xfed91000 IOMMU 1
<6>[0.184414] DMAR-IR: HPET id 0 under DRHD base 0xfed91000
<6>[0.184428] DMAR-IR: Queued invalidation will be enabled to support 
x2apic and Intr-remapping.
<6>[0.185173] DMAR-IR: Enabled IRQ remapping in x2apic mode

<6>[0.878934] DMAR: No ATSR found
<6>[0.878966] DMAR: dmar0: Using Queued invalidation
<6>[0.879007] DMAR: dmar1: Using Queued invalidation

<6>[0.915032] DMAR: Intel(R) Virtualization Technology for Directed I/O
<6>[0.915060] PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
<6>[0.915084] software IO TLB: mapped [mem 0xc80d4000-0xcc0d4000] (64MB)

(Full boot log at 
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/boot0.txt, 
failures at 
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_7054/fi-ivb-3770/igt@i915_selftest@l...@blt.html.)

Does this look familiar or at least plausible to you? Is this something your 
new series has fixed?


This happens during attaching a domain to device. It has nothing to do
with this patch series. I will look into this issue, but not in this
email thread context.

Best regards,
baolu



Regards,

Tvrtko



Please review and test.

Best regards,
baolu

Lu Baolu (2):
iommu: Add quirk for Intel graphic devices in map_sg
iommu/vt-d: Cleanup after converting to dma-iommu ops

Tom Murphy (4):
iommu: Handle freelists when using deferred flushing in iommu drivers
iommu: Add iommu_dma_free_cpu_cached_iovas()
iommu: Allow the dma-iommu api to use bounce buffers
iommu/vt-d: Convert intel iommu driver to the iommu ops

   .../admin-guide/kernel-parameters.txt |   5 -
   drivers/iommu/dma-iommu.c | 229 -
   drivers/iommu/intel/Kconfig   |   1 +
   drivers/iommu/intel/iommu.c   | 885 +++---
   include/linux/dma-iommu.h |   8 +
   include/linux/iommu.h |   1 +
   6 files changed, 323 insertions(+), 806 deletions(-)


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 4/6] iommu: Add quirk for Intel graphic devices in map_sg

2020-09-11 Thread Lu Baolu
Combining the sg segments exposes a bug in the Intel i915 driver which
causes visual artifacts and the screen to freeze. This is most likely
because of how the i915 handles the returned list. It probably doesn't
respect the returned value specifying the number of elements in the list
and instead depends on the previous behaviour of the Intel iommu driver
which would return the same number of elements in the output list as in
the input list.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1a1da22e5a5e..fc19f1fb9413 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -880,6 +880,33 @@ static int __finalise_sg(struct device *dev, struct 
scatterlist *sg, int nents,
unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
int i, count = 0;
 
+   /*
+* The Intel graphic driver is used to assume that the returned
+* sg list is not combound. This blocks the efforts of converting
+* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the
+* device driver work and should be removed once it's fixed in i915
+* driver.
+*/
+   if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) &&
+   to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL &&
+   (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+   for_each_sg(sg, s, nents, i) {
+   unsigned int s_iova_off = sg_dma_address(s);
+   unsigned int s_length = sg_dma_len(s);
+   unsigned int s_iova_len = s->length;
+
+   s->offset += s_iova_off;
+   s->length = s_length;
+   sg_dma_address(s) = dma_addr + s_iova_off;
+   sg_dma_len(s) = s_length;
+   dma_addr += s_iova_len;
+
+   pr_info_once("sg combining disabled due to i915 
driver\n");
+   }
+
+   return nents;
+   }
+
for_each_sg(sg, s, nents, i) {
/* Restore this segment's original unaligned fields first */
unsigned int s_iova_off = sg_dma_address(s);
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 6/6] iommu/vt-d: Cleanup after converting to dma-iommu ops

2020-09-11 Thread Lu Baolu
Some cleanups after converting the driver to use dma-iommu ops.
- Remove nobounce option;
- Cleanup and simplify the path in domain mapping.

Signed-off-by: Lu Baolu 
---
 .../admin-guide/kernel-parameters.txt |  5 --
 drivers/iommu/intel/iommu.c   | 90 ++-
 2 files changed, 28 insertions(+), 67 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..0d11ef43d314 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1866,11 +1866,6 @@
Note that using this option lowers the security
provided by tboot because it makes the system
vulnerable to DMA attacks.
-   nobounce [Default off]
-   Disable bounce buffer for untrusted devices such as
-   the Thunderbolt devices. This will treat the untrusted
-   devices as the trusted ones, hence might expose security
-   risks of DMA attacks.
 
intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
0   disables intel_idle and fall back on acpi_idle.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index adc231790e0a..fe2544c95013 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -355,7 +355,6 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
-static int intel_no_bounce;
 static int iommu_skip_te_disable;
 
 #define IDENTMAP_GFX   2
@@ -457,9 +456,6 @@ static int __init intel_iommu_setup(char *str)
} else if (!strncmp(str, "tboot_noforce", 13)) {
pr_info("Intel-IOMMU: not forcing on after tboot. This 
could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
-   } else if (!strncmp(str, "nobounce", 8)) {
-   pr_info("Intel-IOMMU: No bounce buffer. This could 
expose security risks of DMA attacks\n");
-   intel_no_bounce = 1;
}
 
str += strcspn(str, ",");
@@ -2230,15 +2226,14 @@ static inline int hardware_largepage_caps(struct 
dmar_domain *domain,
return level;
 }
 
-static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
-   struct scatterlist *sg, unsigned long phys_pfn,
-   unsigned long nr_pages, int prot)
+static int
+__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+unsigned long phys_pfn, unsigned long nr_pages, int prot)
 {
struct dma_pte *first_pte = NULL, *pte = NULL;
-   phys_addr_t pteval;
-   unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+   phys_addr_t pteval;
u64 attr;
 
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
@@ -2250,26 +2245,14 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
if (domain_use_first_level(domain))
attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
 
-   if (!sg) {
-   sg_res = nr_pages;
-   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
-   }
+   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
 
while (nr_pages > 0) {
uint64_t tmp;
 
-   if (!sg_res) {
-   unsigned int pgoff = sg->offset & ~PAGE_MASK;
-
-   sg_res = aligned_nrpages(sg->offset, sg->length);
-   sg->dma_address = ((dma_addr_t)iov_pfn << 
VTD_PAGE_SHIFT) + pgoff;
-   sg->dma_length = sg->length;
-   pteval = (sg_phys(sg) - pgoff) | attr;
-   phys_pfn = pteval >> VTD_PAGE_SHIFT;
-   }
-
if (!pte) {
-   largepage_lvl = hardware_largepage_caps(domain, 
iov_pfn, phys_pfn, sg_res);
+   largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
+   phys_pfn, nr_pages);
 
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, 
_lvl);
if (!pte)
@@ -2281,7 +2264,7 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
pteval |= DMA_PTE_LARGE_PAGE;
lvl_pages = lvl_to_nr_pages(largepage_lvl);
 
-   nr_superpages = sg_res / lvl_pages;
+   nr_superpages = nr_pages / lvl_pages;
end_pfn = 

[Intel-gfx] [PATCH v3 2/6] iommu: Add iommu_dma_free_cpu_cached_iovas()

2020-09-11 Thread Lu Baolu
From: Tom Murphy 

Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which
use the dma-iommu ops to free cached cpu iovas.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 9 +
 include/linux/dma-iommu.h | 8 
 2 files changed, 17 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 82c071b2d5c8..d06411bd5e08 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -50,6 +50,15 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+
+   free_cpu_cached_iovas(cpu, iovad);
+}
+
 static void iommu_dma_entry_dtor(unsigned long data)
 {
struct page *freelist = (struct page *)data;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 2112f21f73d8..706b68d1359b 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc,
 
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain);
+
 #else /* CONFIG_IOMMU_DMA */
 
 struct iommu_domain;
@@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device 
*dev, struct list_he
 {
 }
 
+static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+}
+
 #endif /* CONFIG_IOMMU_DMA */
 #endif /* __DMA_IOMMU_H */
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 5/6] iommu/vt-d: Convert intel iommu driver to the iommu ops

2020-09-11 Thread Lu Baolu
From: Tom Murphy 

Convert the intel iommu driver to the dma-iommu api. Remove the iova
handling and reserve region code from the intel iommu driver.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/Kconfig |   1 +
 drivers/iommu/intel/iommu.c | 742 ++--
 2 files changed, 43 insertions(+), 700 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 5337ee1584b0..28a3d1596c76 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -13,6 +13,7 @@ config INTEL_IOMMU
select DMAR_TABLE
select SWIOTLB
select IOASID
+   select IOMMU_DMA
help
  DMA remapping (DMAR) devices support enables independent address
  translations for Direct Memory Access (DMA) from devices.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 63ee30c689a7..adc231790e0a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -41,7 +42,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -382,9 +382,6 @@ struct device_domain_info *get_domain_info(struct device 
*dev)
 DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
-#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&   \
-   to_pci_dev(d)->untrusted)
-
 /*
  * Iterate over elements in device_domain_list and call the specified
  * callback @fn against each element.
@@ -1242,13 +1239,6 @@ static void dma_free_pagelist(struct page *freelist)
}
 }
 
-static void iova_entry_free(unsigned long data)
-{
-   struct page *freelist = (struct page *)data;
-
-   dma_free_pagelist(freelist);
-}
-
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1613,19 +1603,17 @@ static inline void __mapping_notify_one(struct 
intel_iommu *iommu,
iommu_flush_write_buffer(iommu);
 }
 
-static void iommu_flush_iova(struct iova_domain *iovad)
+static void intel_flush_iotlb_all(struct iommu_domain *domain)
 {
-   struct dmar_domain *domain;
+   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
int idx;
 
-   domain = container_of(iovad, struct dmar_domain, iovad);
-
-   for_each_domain_iommu(idx, domain) {
+   for_each_domain_iommu(idx, dmar_domain) {
struct intel_iommu *iommu = g_iommus[idx];
-   u16 did = domain->iommu_did[iommu->seq_id];
+   u16 did = dmar_domain->iommu_did[iommu->seq_id];
 
-   if (domain_use_first_level(domain))
-   domain_flush_piotlb(iommu, domain, 0, -1, 0);
+   if (domain_use_first_level(dmar_domain))
+   domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
else
iommu->flush.flush_iotlb(iommu, did, 0, 0,
 DMA_TLB_DSI_FLUSH);
@@ -1907,48 +1895,6 @@ static int domain_detach_iommu(struct dmar_domain 
*domain,
return count;
 }
 
-static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_rbtree_key;
-
-static int dmar_init_reserved_ranges(void)
-{
-   struct pci_dev *pdev = NULL;
-   struct iova *iova;
-   int i;
-
-   init_iova_domain(_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
-
-   lockdep_set_class(_iova_list.iova_rbtree_lock,
-   _rbtree_key);
-
-   /* IOAPIC ranges shouldn't be accessed by DMA */
-   iova = reserve_iova(_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
-   IOVA_PFN(IOAPIC_RANGE_END));
-   if (!iova) {
-   pr_err("Reserve IOAPIC range failed\n");
-   return -ENODEV;
-   }
-
-   /* Reserve all PCI MMIO to avoid peer-to-peer access */
-   for_each_pci_dev(pdev) {
-   struct resource *r;
-
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   r = >resource[i];
-   if (!r->flags || !(r->flags & IORESOURCE_MEM))
-   continue;
-   iova = reserve_iova(_iova_list,
-   IOVA_PFN(r->start),
-   IOVA_PFN(r->end));
-   if (!iova) {
-   pci_err(pdev, "Reserve iova for %pR failed\n", 
r);
-   return -ENODEV;
-   }
-   }
-   }
-   return 0;
-}
-
 static inline int guestwidth_to_adjustwidth(int gaw)
 {
int agaw;
@@ -1971,7 +1917,7 @@ static void domain_exit(struct dmar_domain *domain)
 
/* destroy iovas */
if (domain->domain.type == IOMMU_DOMAIN_DMA)
- 

[Intel-gfx] [PATCH v3 0/6] Convert the intel iommu driver to the dma-iommu api

2020-09-11 Thread Lu Baolu
Tom Murphy has almost done all the work. His latest patch series was
posted here.

https://lore.kernel.org/linux-iommu/20200903201839.7327-1-murph...@tcd.ie/

Thanks a lot!

This series is a follow-up with below changes:

1. Add a quirk for the i915 driver issue described in Tom's cover
letter.
2. Fix several bugs in patch "iommu: Allow the dma-iommu api to use
bounce buffers" to make the bounce buffer work for untrusted devices.
3. Several cleanups in iommu/vt-d driver after the conversion.

Please review and test.

Best regards,
baolu

Lu Baolu (2):
  iommu: Add quirk for Intel graphic devices in map_sg
  iommu/vt-d: Cleanup after converting to dma-iommu ops

Tom Murphy (4):
  iommu: Handle freelists when using deferred flushing in iommu drivers
  iommu: Add iommu_dma_free_cpu_cached_iovas()
  iommu: Allow the dma-iommu api to use bounce buffers
  iommu/vt-d: Convert intel iommu driver to the iommu ops

 .../admin-guide/kernel-parameters.txt |   5 -
 drivers/iommu/dma-iommu.c | 229 -
 drivers/iommu/intel/Kconfig   |   1 +
 drivers/iommu/intel/iommu.c   | 885 +++---
 include/linux/dma-iommu.h |   8 +
 include/linux/iommu.h |   1 +
 6 files changed, 323 insertions(+), 806 deletions(-)

-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 3/6] iommu: Allow the dma-iommu api to use bounce buffers

2020-09-11 Thread Lu Baolu
From: Tom Murphy 

Allow the dma-iommu api to use bounce buffers for untrusted devices.
This is a copy of the intel bounce buffer code.

Signed-off-by: Tom Murphy 
Co-developed-by: Lu Baolu 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 163 +++---
 1 file changed, 150 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index d06411bd5e08..1a1da22e5a5e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -21,9 +21,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 struct iommu_dma_msi_page {
struct list_headlist;
@@ -498,6 +500,31 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
+static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
+   size_t size, enum dma_data_direction dir,
+   unsigned long attrs)
+{
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   phys_addr_t phys;
+
+   phys = iommu_iova_to_phys(domain, dma_addr);
+   if (WARN_ON(!phys))
+   return;
+
+   __iommu_dma_unmap(dev, dma_addr, size);
+
+   if (unlikely(is_swiotlb_buffer(phys)))
+   swiotlb_tbl_unmap_single(dev, phys, size,
+   iova_align(iovad, size), dir, attrs);
+}
+
+static bool dev_is_untrusted(struct device *dev)
+{
+   return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+}
+
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -523,6 +550,55 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
+static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
+   size_t org_size, dma_addr_t dma_mask, bool coherent,
+   enum dma_data_direction dir, unsigned long attrs)
+{
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = org_size;
+   void *padding_start;
+   size_t padding_size;
+   dma_addr_t iova;
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | org_size)) {
+   aligned_size = iova_align(iovad, org_size);
+   phys = swiotlb_tbl_map_single(dev,
+   __phys_to_dma(dev, io_tlb_start),
+   phys, org_size, aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+   return DMA_MAPPING_ERROR;
+
+   /* Cleanup the padding area. */
+   padding_start = phys_to_virt(phys);
+   padding_size = aligned_size;
+
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+   (dir == DMA_TO_DEVICE ||
+dir == DMA_BIDIRECTIONAL)) {
+   padding_start += org_size;
+   padding_size -= org_size;
+   }
+
+   memset(padding_start, 0, padding_size);
+   }
+
+   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+   swiotlb_tbl_unmap_single(dev, phys, org_size,
+   aligned_size, dir, attrs);
+
+   return iova;
+}
+
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -698,11 +774,15 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   arch_sync_dma_for_cpu(phys, size, dir);
+   if (!dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
+   if (is_swiotlb_buffer(phys))
+   swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
 }
 
 static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -710,11 +790,15 @@ static void iommu_dma_sync_single_for_device(struct 
device *dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return

[Intel-gfx] [PATCH v3 1/6] iommu: Handle freelists when using deferred flushing in iommu drivers

2020-09-11 Thread Lu Baolu
From: Tom Murphy 

Allow the iommu_unmap_fast to return newly freed page table pages and
pass the freelist to queue_iova in the dma-iommu ops path.

This is useful for iommu drivers (in this case the intel iommu driver)
which need to wait for the ioTLB to be flushed before newly
free/unmapped page table pages can be freed. This way we can still batch
ioTLB free operations and handle the freelists.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c   | 30 ++--
 drivers/iommu/intel/iommu.c | 55 -
 include/linux/iommu.h   |  1 +
 3 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5141d49a046b..82c071b2d5c8 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -50,6 +50,18 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+static void iommu_dma_entry_dtor(unsigned long data)
+{
+   struct page *freelist = (struct page *)data;
+
+   while (freelist) {
+   unsigned long p = (unsigned long)page_address(freelist);
+
+   freelist = freelist->freelist;
+   free_page(p);
+   }
+}
+
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 {
if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
@@ -344,7 +356,8 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, ) && attr) {
cookie->fq_domain = domain;
-   init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
+   init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
+ iommu_dma_entry_dtor);
}
 
if (!dev)
@@ -438,7 +451,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
-   dma_addr_t iova, size_t size)
+   dma_addr_t iova, size_t size, struct page *freelist)
 {
struct iova_domain *iovad = >iovad;
 
@@ -447,7 +460,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie 
*cookie,
cookie->msi_iova -= size;
else if (cookie->fq_domain) /* non-strict mode */
queue_iova(iovad, iova_pfn(iovad, iova),
-   size >> iova_shift(iovad), 0);
+   size >> iova_shift(iovad),
+   (unsigned long)freelist);
else
free_iova_fast(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad));
@@ -472,7 +486,7 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
 
if (!cookie->fq_domain)
iommu_tlb_sync(domain, _gather);
-   iommu_dma_free_iova(cookie, dma_addr, size);
+   iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -494,7 +508,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return DMA_MAPPING_ERROR;
 
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -649,7 +663,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
 out_free_sg:
sg_free_table();
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -900,7 +914,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, iova_len);
+   iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
__invalidate_sg(sg, nents);
return 0;
@@ -1194,7 +1208,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
return msi_page;
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_page:
kfree(msi_page);
return NULL;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 87b17bac04c2..63ee30c689a7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1208,17 +1208,17 @@ static struct page *dma_pte_clear_level(struct 
dmar_domain *domain, int level,
pages can only be freed after the IOTLB flush has been done. */
 s

Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu

2020-09-11 Thread Lu Baolu

On 2020/9/9 15:06, Christoph Hellwig wrote:

On Wed, Sep 09, 2020 at 09:43:09AM +0800, Lu Baolu wrote:

+   /*
+* The Intel graphic device driver is used to assume that the
returned
+* sg list is not combound. This blocks the efforts of converting
the


This adds pointless overly long lines.


+* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the
+* device driver work and should be removed once it's fixed in i915
+* driver.
+*/
+   if (dev_is_pci(dev) &&
+   to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL &&
+   (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+   for_each_sg(sg, s, nents, i) {
+   unsigned int s_iova_off = sg_dma_address(s);
+   unsigned int s_length = sg_dma_len(s);
+   unsigned int s_iova_len = s->length;
+
+   s->offset += s_iova_off;
+   s->length = s_length;
+   sg_dma_address(s) = dma_addr + s_iova_off;
+   sg_dma_len(s) = s_length;
+   dma_addr += s_iova_len;
+   }
+
+   return nents;
+   }


This wants an IS_ENABLED() check.  And probably a pr_once reminding
of the workaround.



Will fix in the next version.

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu

2020-09-08 Thread Lu Baolu

Hi Christoph,

On 9/8/20 2:23 PM, Christoph Hellwig wrote:

On Tue, Sep 08, 2020 at 02:04:53PM +0800, Lu Baolu wrote:

Do you mind telling where can I find Marek's series?


[PATCH v10 00/30] DRM: fix struct sg_table nents vs. orig_nents misuse

on various lists including the iommu one.



It seems that more work is needed in i915 driver. I will added below
quirk as you suggested.

--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -851,6 +851,31 @@ static int __finalise_sg(struct device *dev, struct 
scatterlist *sg, int nents,

unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
int i, count = 0;

+   /*
+* The Intel graphic device driver is used to assume that the 
returned
+* sg list is not combound. This blocks the efforts of 
converting the

+* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the
+* device driver work and should be removed once it's fixed in i915
+* driver.
+*/
+   if (dev_is_pci(dev) &&
+   to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL &&
+   (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+   for_each_sg(sg, s, nents, i) {
+   unsigned int s_iova_off = sg_dma_address(s);
+   unsigned int s_length = sg_dma_len(s);
+   unsigned int s_iova_len = s->length;
+
+   s->offset += s_iova_off;
+   s->length = s_length;
+   sg_dma_address(s) = dma_addr + s_iova_off;
+   sg_dma_len(s) = s_length;
+   dma_addr += s_iova_len;
+   }
+
+   return nents;
+   }
+

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu

2020-09-08 Thread Lu Baolu

On 2020/9/8 14:23, Christoph Hellwig wrote:

On Tue, Sep 08, 2020 at 02:04:53PM +0800, Lu Baolu wrote:

Do you mind telling where can I find Marek's series?


[PATCH v10 00/30] DRM: fix struct sg_table nents vs. orig_nents misuse

on various lists including the iommu one.



Get it. Thank you!

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2 5/5] DO NOT MERGE: iommu: disable list appending in dma-iommu

2020-09-08 Thread Lu Baolu

Hi Christoph,

On 9/8/20 1:55 PM, Christoph Hellwig wrote:

On Tue, Sep 08, 2020 at 06:36:19AM +0100, Christoph Hellwig wrote:

On Mon, Sep 07, 2020 at 09:18:50PM +0100, Tom Murphy wrote:

Yeah we talked about passing an attr to map_sg to disable merging at
the following microconfernce:
https://linuxplumbersconf.org/event/7/contributions/846/
As far as I can remember everyone seemed happy with that solution. I
won't be working on this though as I don't have any more time to
dedicate to this. It seems Lu Baolu will take over this.


I'm absolutely again passing a flag.  Tha just invites further
abuse.  We need a PCI ID based quirk or something else that can't
be as easily abused.


Also, I looked at i915 and there are just three dma_map_sg callers.
The two dmabuf related ones are fixed by Marek in his series, leaving


Do you mind telling where can I find Marek's series?

Best regards,
baolu


just the one in i915_gem_gtt_prepare_pages, which does indeed look
very fishy.  But if that one is so hard to fix it can just be replaced
by an open coded for_each_sg loop that contains manual dma_map_page
calls.
___
iommu mailing list
io...@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] iommu/intel: Handle 36b addressing for x86-32

2020-08-24 Thread Lu Baolu

Hi Chris,

On 2020/8/23 0:02, Chris Wilson wrote:

Beware that the address size for x86-32 may exceed unsigned long.

[0.368971] UBSAN: shift-out-of-bounds in drivers/iommu/intel/iommu.c:128:14
[0.369055] shift exponent 36 is too large for 32-bit type 'long unsigned 
int'

If we don't handle the wide addresses, the pages are mismapped and the
device read/writes go astray, detected as DMAR faults and leading to
device failure. The behaviour changed (from working to broken) in commit
fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer"), but

commit  ("iommu/vt-d: Delegate the dma domain to upper layer")

and adjust the title as "iommu/vt-d: Handle 36bit addressing for x86-32"

with above two changes,

Acked-by: Lu Baolu 

Best regards,
baolu


the error looks older.

Fixes: fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer")
Signed-off-by: Chris Wilson 
Cc: James Sewart 
Cc: Lu Baolu 
Cc: Joerg Roedel 
Cc:  # v5.3+
---
  drivers/iommu/intel/iommu.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 2e9c8c3d0da4..ba78a2e854f9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -123,29 +123,29 @@ static inline unsigned int level_to_offset_bits(int level)
return (level - 1) * LEVEL_STRIDE;
  }
  
-static inline int pfn_level_offset(unsigned long pfn, int level)

+static inline int pfn_level_offset(u64 pfn, int level)
  {
return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
  }
  
-static inline unsigned long level_mask(int level)

+static inline u64 level_mask(int level)
  {
-   return -1UL << level_to_offset_bits(level);
+   return -1ULL << level_to_offset_bits(level);
  }
  
-static inline unsigned long level_size(int level)

+static inline u64 level_size(int level)
  {
-   return 1UL << level_to_offset_bits(level);
+   return 1ULL << level_to_offset_bits(level);
  }
  
-static inline unsigned long align_to_level(unsigned long pfn, int level)

+static inline u64 align_to_level(u64 pfn, int level)
  {
return (pfn + level_size(level) - 1) & level_mask(level);
  }
  
  static inline unsigned long lvl_to_nr_pages(unsigned int lvl)

  {
-   return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
+   return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
  }
  
  /* VT-d pages must always be _smaller_ than MM pages. Otherwise things



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 02/13] iommu/vt-d: Use dev_iommu_priv_get/set()

2020-06-25 Thread Lu Baolu

Hi Joerg,

On 2020/6/25 21:08, Joerg Roedel wrote:

From: Joerg Roedel 

Remove the use of dev->archdata.iommu and use the private per-device
pointer provided by IOMMU core code instead.

Signed-off-by: Joerg Roedel 
---
  .../gpu/drm/i915/selftests/mock_gem_device.c   | 10 --
  drivers/iommu/intel/iommu.c| 18 +-


For changes in VT-d driver,

Reviewed-by: Lu Baolu 

Best regards,
baolu


  2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c 
b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 9b105b811f1f..e08601905a64 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -24,6 +24,7 @@
  
  #include 

  #include 
+#include 
  
  #include 
  
@@ -118,6 +119,9 @@ struct drm_i915_private *mock_gem_device(void)

  {
struct drm_i915_private *i915;
struct pci_dev *pdev;
+#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
+   struct dev_iommu iommu;
+#endif
int err;
  
  	pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);

@@ -136,8 +140,10 @@ struct drm_i915_private *mock_gem_device(void)
dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(64));
  
  #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)

-   /* hack to disable iommu for the fake device; force identity mapping */
-   pdev->dev.archdata.iommu = (void *)-1;
+   /* HACK HACK HACK to disable iommu for the fake device; force identity 
mapping */
+   memset(, 0, sizeof(iommu));
+   iommu.priv = (void *)-1;
+   pdev->dev.iommu = 
  #endif
  
  	pci_set_drvdata(pdev, i915);

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..2ce490c2eab8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -372,7 +372,7 @@ struct device_domain_info *get_domain_info(struct device 
*dev)
if (!dev)
return NULL;
  
-	info = dev->archdata.iommu;

+   info = dev_iommu_priv_get(dev);
if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
 info == DEFER_DEVICE_DOMAIN_INFO))
return NULL;
@@ -743,12 +743,12 @@ struct context_entry *iommu_context_addr(struct 
intel_iommu *iommu, u8 bus,
  
  static int iommu_dummy(struct device *dev)

  {
-   return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+   return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO;
  }
  
  static bool attach_deferred(struct device *dev)

  {
-   return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+   return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
  }
  
  /**

@@ -2420,7 +2420,7 @@ static inline void unlink_domain_info(struct 
device_domain_info *info)
list_del(>link);
list_del(>global);
if (info->dev)
-   info->dev->archdata.iommu = NULL;
+   dev_iommu_priv_set(info->dev, NULL);
  }
  
  static void domain_remove_dev_info(struct dmar_domain *domain)

@@ -2453,7 +2453,7 @@ static void do_deferred_attach(struct device *dev)
  {
struct iommu_domain *domain;
  
-	dev->archdata.iommu = NULL;

+   dev_iommu_priv_set(dev, NULL);
domain = iommu_get_domain_for_dev(dev);
if (domain)
intel_iommu_attach_device(domain, dev);
@@ -2599,7 +2599,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
list_add(>link, >devices);
list_add(>global, _domain_list);
if (dev)
-   dev->archdata.iommu = info;
+   dev_iommu_priv_set(dev, info);
spin_unlock_irqrestore(_domain_lock, flags);
  
  	/* PASID table is mandatory for a PCI device in scalable mode. */

@@ -4004,7 +4004,7 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev 
*pdev)
if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) 
QuickData Technology device\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
-   pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+   dev_iommu_priv_set(>dev, DUMMY_DEVICE_DOMAIN_INFO);
}
  }
  DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, 
quirk_ioat_snb_local_iommu);
@@ -4043,7 +4043,7 @@ static void __init init_no_remapping_devices(void)
drhd->ignored = 1;
for_each_active_dev_scope(drhd->devices,
  drhd->devices_cnt, i, dev)
-   dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+   dev_iommu_priv_set(dev, 
DUMMY_DEVICE_DOMAIN_INFO);
}
}
  }
@@ -5665,7 +5665,7 @@ static stru

Re: [Intel-gfx] [PATCH 3/8] iommu/vt-d: Remove IOVA handling code from non-dma_ops path

2020-03-20 Thread Lu Baolu

On 2020/3/20 14:30, Tom Murphy wrote:

Could we merge patch 1-3 from this series? it just cleans up weird
code and merging these patches will cover some of the work needed to
move the intel iommu driver to the dma-iommu api in the future.


Can you please take a look at this patch series?

https://lkml.org/lkml/2020/3/13/1162

It probably makes this series easier.

Best regards,
baolu



On Sat, 21 Dec 2019 at 07:04, Tom Murphy  wrote:

Remove all IOVA handling code from the non-dma_ops path in the intel
iommu driver.

There's no need for the non-dma_ops path to keep track of IOVAs. The
whole point of the non-dma_ops path is that it allows the IOVAs to be
handled separately. The IOVA handling code removed in this patch is
pointless.

Signed-off-by: Tom Murphy

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/8] iommu/vt-d: clean up 32bit si_domain assignment

2019-12-22 Thread Lu Baolu

Hi,

On 12/21/19 11:03 PM, Tom Murphy wrote:

@@ -5618,9 +5583,13 @@ static int intel_iommu_add_device(struct device *dev)
struct iommu_domain *domain;
struct intel_iommu *iommu;
struct iommu_group *group;
+   u64 dma_mask = *dev->dma_mask;
u8 bus, devfn;
int ret;
  
+	if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)

+   dma_mask = dev->coherent_dma_mask;
+
iommu = device_to_iommu(dev, , );
if (!iommu)
return -ENODEV;
@@ -5640,7 +5609,12 @@ static int intel_iommu_add_device(struct device *dev)
domain = iommu_get_domain_for_dev(dev);
dmar_domain = to_dmar_domain(domain);
if (domain->type == IOMMU_DOMAIN_DMA) {
-   if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
+   /*
+* We check dma_mask >= dma_get_required_mask(dev) because
+* 32 bit DMA falls back to non-identity mapping.
+*/
+   if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY &&
+   dma_mask >= dma_get_required_mask(dev)) {
ret = iommu_request_dm_for_dev(dev);
if (ret) {
dmar_remove_one_dev_info(dev);


dev->dma_mask is set to 32bit by default. During loading driver, it sets
the real dma_mask with dma_set_mask() according to the real capability.
Here you will always see 32bit dma_mask for each device.

Best regards,
baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-10-11 Thread Lu Baolu

Hi Janusz,

On 9/3/19 3:41 PM, Janusz Krzysztofik wrote:

Hi Baolu,

On Tuesday, September 3, 2019 3:29:40 AM CEST Lu Baolu wrote:

Hi Janusz,

On 9/2/19 4:37 PM, Janusz Krzysztofik wrote:

I am not saying that keeping data is not acceptable. I just want to
check whether there are any other solutions.

Then reverting 458b7c8e0dde and applying this patch still resolves the

issue

for me.  No errors appear when mappings are unmapped on device close after

the

device has been removed, and domain info preserved on device removal is
successfully reused on device re-plug.

This patch doesn't look good to me although I agree that keeping data is
acceptable. It updates dev->archdata.iommu, but leaves the hardware
context/pasid table unchanged. This might cause problems somewhere.


Is there anything else I can do to help?

Can you please tell me how to reproduce the problem?

The most simple way to reproduce the issue, assuming there are no non-Intel
graphics adapters installed, is to run the following shell commands:

#!/bin/sh
# load i915 module
modprobe i915
# open an i915 device and keep it open in background
cat /dev/dri/card0 >/dev/null &
sleep 2
# simulate device unplug
echo 1 >/sys/class/drm/card0/device/remove
# make the background process close the device on exit
kill $!



I tried to reproduce the issue with above instructions. I got below
kernel messages after above operation. Is it the same as what you've
seen? I can't find anything explicitly related to iommu except an IOMMU
fault generated after device got removed and the DMA translation
structures got torn down. Can you please help me to understand how IOMMU
driver triggers the issue?


[  182.217672] [ cut here ]
[  182.217679] WARNING: CPU: 1 PID: 1285 at 
drivers/gpu/drm/drm_mode_config.c:495 drm_mode_config_cleanup+0x2cb/0x2e0
[  182.217680] Modules linked in: nls_iso8859_1 snd_soc_skl 
snd_soc_sst_ipc snd_soc_sst_dsp snd_hda_ext_core 
snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core intel_rapl_msr 
snd_hda_codec_hdmi snd_hda_codec_realtek snd_compress 
snd_hda_codec_generic ledtrig_audio ac97_bus snd_pcm_dmaengine 
snd_hda_intel snd_intel_nhlt snd_hda_codec snd_hda_core snd_hwdep 
snd_pcm intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp 
kvm_intel kvm snd_seq_midi irqbypass snd_seq_midi_event snd_rawmidi 
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel iwlmvm snd_seq 
mac80211 libarc4 aesni_intel snd_seq_device snd_timer crypto_simd 
ipu3_cio2 cryptd glue_helper iwlwifi v4l2_fwnode intel_cstate 
videobuf2_dma_sg videobuf2_memops intel_rapl_perf asix videobuf2_v4l2 
videobuf2_common usbnet mii cfg80211 input_leds serio_raw 
intel_wmi_thunderbolt snd mei_me videodev soundcore 
intel_xhci_usb_role_switch mei 8250_dw mc roles intel_pch_thermal 
hid_sensor_magn_3d hid_sensor_accel_3d hid_sensor_press
[  182.217709]  hid_sensor_incl_3d hid_sensor_als hid_sensor_rotation 
hid_sensor_gyro_3d hid_sensor_trigger industrialio_triggered_buffer 
kfifo_buf hid_sensor_iio_common industrialio intel_vbtn mac_hid 
intel_hid acpi_pad sparse_keymap sch_fq_codel parport_pc ppdev lp 
parport ip_tables x_tables hid_sensor_custom hid_sensor_hub 
intel_ishtp_hid hid_generic dwc3 ulpi udc_core i2c_designware_platform 
i2c_designware_core e1000e psmouse i2c_i801 tg3 usbhid dwc3_pci hid 
intel_ish_ipc intel_lpss_pci intel_ishtp intel_lpss wmi 
pinctrl_sunrisepoint pinctrl_intel

[  182.217727] CPU: 1 PID: 1285 Comm: bash Not tainted 5.4.0-rc2+ #2704
[  182.217728] Hardware name: Intel Corporation Skylake Client 
platform/Skylake Y LPDDR3 RVP3, BIOS SKLSE2R1.R00.B105.B00.1512051901 
12/05/2015

[  182.217731] RIP: 0010:drm_mode_config_cleanup+0x2cb/0x2e0
[  182.217732] Code: eb 0c 48 8b 70 48 4c 89 e7 e8 31 f4 ff ff 48 8d 7d 
a0 e8 d8 95 ff ff 48 85 c0 75 e6 48 8d 7d a0 e8 1a 87 ff ff e9 ef fd ff 
ff <0f> 0b e9 ed fe ff ff 0f 0b eb 98 e8 05 ec 98 ff 0f 1f 44 00 00 0f

[  182.217733] RSP: 0018:a1c481d37c78 EFLAGS: 00010216
[  182.217735] RAX: 95c7da326e08 RBX: 95c7dcf6 RCX: 
801e
[  182.217735] RDX: 95c7dcf603b8 RSI: 801e RDI: 
95c7dcf60390
[  182.217736] RBP: a1c481d37cd8 R08:  R09: 
9e035e00
[  182.217737] R10: a1c481d37be0 R11: 0001 R12: 
95c7dcf60250
[  182.217737] R13: 95c7dcf603b8 R14: a1c481d37ee8 R15: 
fff2
[  182.217739] FS:  7f804af7f740() GS:95c7e7a8() 
knlGS:

[  182.217739] CS:  0010 DS:  ES:  CR0: 80050033
[  182.217740] CR2: 7ffebc530cec CR3: 000137e0e003 CR4: 
003606e0
[  182.217741] DR0:  DR1:  DR2: 

[  182.217742] DR3:  DR6: fffe0ff0 DR7: 
0400

[  182.217742] Call Trace:
[  182.217748]  ? _cond_resched+0x19/0x40
[  182.217752]  intel_modeset_driver_remove+0xd1/0x150
[  182.217754]  i915_driver_remove+0xb8/0x120
[  182.21775

Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-10-07 Thread Lu Baolu

Hi,

On 10/1/19 11:01 PM, Janusz Krzysztofik wrote:

Hi Baolu,

On Tuesday, September 3, 2019 9:41:23 AM CEST Janusz Krzysztofik wrote:

Hi Baolu,

On Tuesday, September 3, 2019 3:29:40 AM CEST Lu Baolu wrote:

Hi Janusz,

On 9/2/19 4:37 PM, Janusz Krzysztofik wrote:

I am not saying that keeping data is not acceptable. I just want to
check whether there are any other solutions.

Then reverting 458b7c8e0dde and applying this patch still resolves the

issue

for me.  No errors appear when mappings are unmapped on device close after

the

device has been removed, and domain info preserved on device removal is
successfully reused on device re-plug.


This patch doesn't look good to me although I agree that keeping data is
acceptable.


Any progress with that?  Which mailing list should I watch for updates?\


We had a holiday last week. I will go ahead with reproducing it locally.
Feel free to let me know if you have any new proposal.

Best regards,
Baolu



Thanks,
Janusz


It updates dev->archdata.iommu, but leaves the hardware
context/pasid table unchanged. This might cause problems somewhere.



Is there anything else I can do to help?


Can you please tell me how to reproduce the problem?


The most simple way to reproduce the issue, assuming there are no non-Intel
graphics adapters installed, is to run the following shell commands:

#!/bin/sh
# load i915 module
modprobe i915
# open an i915 device and keep it open in background
cat /dev/dri/card0 >/dev/null &
sleep 2
# simulate device unplug
echo 1 >/sys/class/drm/card0/device/remove
# make the background process close the device on exit
kill $!

Thanks,
Janusz



Keeping the per
device domain info while device is unplugged is a bit dangerous because
info->dev might be a wild pointer. We need to work out a clean fix.



Thanks,
Janusz



Best regards,
Baolu














___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 5/6] iommu/intel: Declare Broadwell igfx dmar support snafu

2019-09-10 Thread Lu Baolu

Hi,

On 9/9/19 7:00 PM, Chris Wilson wrote:

Despite the widespread and complete failure of Broadwell integrated
graphics when DMAR is enabled, known over the years, we have never been
able to root cause the issue. Instead, we let the failure undermine our
confidence in the iommu system itself when we should be pushing for it to
be always enabled. Quirk away Broadwell and remove the rotten apple.

References: https://bugs.freedesktop.org/show_bug.cgi?id=89360
Signed-off-by: Chris Wilson 
Cc: Lu Baolu 


This patch looks good to me.

Reviewed-by: Lu Baolu 

-baolu


Cc: Martin Peres 
Cc: Joerg Roedel 
---
  drivers/iommu/intel-iommu.c | 44 +
  1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c4e0e4a9ee9e..34f6a3d93ae2 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5690,20 +5690,46 @@ const struct iommu_ops intel_iommu_ops = {
.pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
  };
  
-static void quirk_iommu_g4x_gfx(struct pci_dev *dev)

+static void quirk_iommu_igfx(struct pci_dev *dev)
  {
-   /* G4x/GM45 integrated gfx dmar support is totally busted. */
pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
dmar_map_gfx = 0;
  }
  
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);

-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
+/* G4x/GM45 integrated gfx dmar support is totally busted. */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
+
+/* Broadwell igfx malfunctions with dmar */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
  
  static void quirk_iommu_rwbf(struct pci_dev *dev)

  {


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-09-02 Thread Lu Baolu

Hi Janusz,

On 9/2/19 4:37 PM, Janusz Krzysztofik wrote:

I am not saying that keeping data is not acceptable. I just want to
check whether there are any other solutions.

Then reverting 458b7c8e0dde and applying this patch still resolves the issue
for me.  No errors appear when mappings are unmapped on device close after the
device has been removed, and domain info preserved on device removal is
successfully reused on device re-plug.


This patch doesn't look good to me although I agree that keeping data is
acceptable. It updates dev->archdata.iommu, but leaves the hardware
context/pasid table unchanged. This might cause problems somewhere.



Is there anything else I can do to help?


Can you please tell me how to reproduce the problem? Keeping the per
device domain info while device is unplugged is a bit dangerous because
info->dev might be a wild pointer. We need to work out a clean fix.



Thanks,
Janusz



Best regards,
Baolu


Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-08-29 Thread Lu Baolu

Hi,

On 8/29/19 3:58 PM, Janusz Krzysztofik wrote:

Hi Baolu,

On Thursday, August 29, 2019 3:43:31 AM CEST Lu Baolu wrote:

Hi Janusz,

On 8/28/19 10:17 PM, Janusz Krzysztofik wrote:

We should avoid kernel panic when a intel_unmap() is called against
a non-existent domain.

Does that mean you suggest to replace
BUG_ON(!domain);
with something like
if (WARN_ON(!domain))
return;
and to not care of orphaned mappings left allocated?  Is there a way to

inform

users that their active DMA mappings are no longer valid and they

shouldn't

call dma_unmap_*()?


But we shouldn't expect the IOMMU driver not
cleaning up the domain info when a device remove notification comes and
wait until all file descriptors being closed, right?

Shouldn't then the IOMMU driver take care of cleaning up resources still
allocated on device remove before it invalidates and forgets their

pointers?




You are right. We need to wait until all allocated resources (iova and
mappings) to be released.

How about registering a callback for BUS_NOTIFY_UNBOUND_DRIVER, and
removing the domain info when the driver detachment completes?


Device core calls BUS_NOTIFY_UNBOUND_DRIVER on each driver unbind, regardless
of a device being removed or not.  As long as the device is not unplugged and
the BUS_NOTIFY_REMOVED_DEVICE notification not generated, an unbound driver is
not a problem here.
Morever, BUS_NOTIFY_UNBOUND_DRIVER  is called even before
BUS_NOTIFY_REMOVED_DEVICE so that wouldn't help anyway.
Last but not least, bus events are independent of the IOMMU driver use via
DMA-API it exposes.


Fair enough.



If keeping data for unplugged devices and reusing it on device re-plug is not
acceptable then maybe the IOMMU driver should perform reference counting of
its internal resources occupied by DMA-API users and perform cleanups on last
release?


I am not saying that keeping data is not acceptable. I just want to
check whether there are any other solutions.

Best regards,
Baolu


Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-08-28 Thread Lu Baolu

Hi Janusz,

On 8/28/19 10:17 PM, Janusz Krzysztofik wrote:

We should avoid kernel panic when a intel_unmap() is called against
a non-existent domain.

Does that mean you suggest to replace
BUG_ON(!domain);
with something like
if (WARN_ON(!domain))
return;
and to not care of orphaned mappings left allocated?  Is there a way to inform
users that their active DMA mappings are no longer valid and they shouldn't
call dma_unmap_*()?


But we shouldn't expect the IOMMU driver not
cleaning up the domain info when a device remove notification comes and
wait until all file descriptors being closed, right?

Shouldn't then the IOMMU driver take care of cleaning up resources still
allocated on device remove before it invalidates and forgets their pointers?



You are right. We need to wait until all allocated resources (iova and
mappings) to be released.

How about registering a callback for BUS_NOTIFY_UNBOUND_DRIVER, and
removing the domain info when the driver detachment completes?


Thanks,
Janusz


Best regards,
Baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-08-27 Thread Lu Baolu

Hi Janusz,

On 8/27/19 5:35 PM, Janusz Krzysztofik wrote:

Hi Lu,

On Monday, August 26, 2019 10:29:12 AM CEST Lu Baolu wrote:

Hi Janusz,

On 8/26/19 4:15 PM, Janusz Krzysztofik wrote:

Hi Lu,

On Friday, August 23, 2019 3:51:11 AM CEST Lu Baolu wrote:

Hi,

On 8/22/19 10:29 PM, Janusz Krzysztofik wrote:

When a perfectly working i915 device is hot unplugged (via sysfs) and
hot re-plugged again, its dev->archdata.iommu field is not populated
again with an IOMMU pointer.  As a result, the device probe fails on
DMA mapping error during scratch page setup.

It looks like that happens because devices are not detached from their
MMUIO bus before they are removed on device unplug.  Then, when an
already registered device/IOMMU association is identified by the
reinstantiated device's bus and function IDs on IOMMU bus re-attach
attempt, the device's archdata is not populated with IOMMU information
and the bad happens.

I'm not sure if this is a proper fix but it works for me so at least it
confirms correctness of my analysis results, I believe.  So far I
haven't been able to identify a good place where the possibly missing
IOMMU bus detach on device unplug operation could be added.


Which kernel version are you testing with? Does it contain below commit?

commit 458b7c8e0dde12d140e3472b80919cbb9ae793f4
Author: Lu Baolu 
Date:   Thu Aug 1 11:14:58 2019 +0800


I was using an internal branch based on drm-tip which didn't contain this
commit yet.  Fortunately it has been already merged into drm-tip over last
weekend and has effectively fixed the issue.


Thanks for testing this.


My testing appeared not sufficiently exhaustive. The fix indeed resolved my
initially discovered issue of not being able to rebind the i915 driver to a
re-plugged device, however it brought another, probably more serious problem
to light.

When an open i915 device is hot unplugged, IOMMU bus notifier now cleans up
IOMMU info for the device on PCI device remove while the i915 driver is still
not released, kept by open file descriptors.  Then, on last device close,
cleanup attempts lead to kernel panic raised from intel_unmap() on unresolved
IOMMU domain.


We should avoid kernel panic when a intel_unmap() is called against
a non-existent domain. But we shouldn't expect the IOMMU driver not
cleaning up the domain info when a device remove notification comes and 
wait until all file descriptors being closed, right?


Best regards,
Baolu



With commit 458b7c8e0dde reverted and my fix applied, both late device close
and device re-plug work for me.  However, I can realize that's probably still
not a complete solution, possibly missing some protection against reuse of a
removed device other than for cleanup.  If you think that's the right way to
go, I can work more on that.

I've had a look at other drivers and found AMD is using somehow similar
approach.  On the other hand, looking at the IOMMU common code I couldn't
identify any arrangement that would support deferred device cleanup.

If that approach is not acceptable for Intel IOMMU, please suggest a way you'd
like to have it resolved and I can try to implement it.

Thanks,
Janusz


Best regards,
Lu Baolu



Re: [Intel-gfx] [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-08-26 Thread Lu Baolu

Hi Janusz,

On 8/26/19 4:15 PM, Janusz Krzysztofik wrote:

Hi Lu,

On Friday, August 23, 2019 3:51:11 AM CEST Lu Baolu wrote:

Hi,

On 8/22/19 10:29 PM, Janusz Krzysztofik wrote:

When a perfectly working i915 device is hot unplugged (via sysfs) and
hot re-plugged again, its dev->archdata.iommu field is not populated
again with an IOMMU pointer.  As a result, the device probe fails on
DMA mapping error during scratch page setup.

It looks like that happens because devices are not detached from their
MMUIO bus before they are removed on device unplug.  Then, when an
already registered device/IOMMU association is identified by the
reinstantiated device's bus and function IDs on IOMMU bus re-attach
attempt, the device's archdata is not populated with IOMMU information
and the bad happens.

I'm not sure if this is a proper fix but it works for me so at least it
confirms correctness of my analysis results, I believe.  So far I
haven't been able to identify a good place where the possibly missing
IOMMU bus detach on device unplug operation could be added.


Which kernel version are you testing with? Does it contain below commit?

commit 458b7c8e0dde12d140e3472b80919cbb9ae793f4
Author: Lu Baolu 
Date:   Thu Aug 1 11:14:58 2019 +0800


I was using an internal branch based on drm-tip which didn't contain this
commit yet.  Fortunately it has been already merged into drm-tip over last
weekend and has effectively fixed the issue.


Thanks for testing this.

Best regards,
Lu Baolu
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-08-22 Thread Lu Baolu

Hi,

On 8/22/19 10:29 PM, Janusz Krzysztofik wrote:

When a perfectly working i915 device is hot unplugged (via sysfs) and
hot re-plugged again, its dev->archdata.iommu field is not populated
again with an IOMMU pointer.  As a result, the device probe fails on
DMA mapping error during scratch page setup.

It looks like that happens because devices are not detached from their
MMUIO bus before they are removed on device unplug.  Then, when an
already registered device/IOMMU association is identified by the
reinstantiated device's bus and function IDs on IOMMU bus re-attach
attempt, the device's archdata is not populated with IOMMU information
and the bad happens.

I'm not sure if this is a proper fix but it works for me so at least it
confirms correctness of my analysis results, I believe.  So far I
haven't been able to identify a good place where the possibly missing
IOMMU bus detach on device unplug operation could be added.


Which kernel version are you testing with? Does it contain below commit?

commit 458b7c8e0dde12d140e3472b80919cbb9ae793f4
Author: Lu Baolu 
Date:   Thu Aug 1 11:14:58 2019 +0800

iommu/vt-d: Detach domain when move device out of group

When removing a device from an iommu group, the domain should
be detached from the device. Otherwise, the stale domain info
will still be cached by the driver and the driver will refuse
to attach any domain to the device again.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Fixes: b7297783c2bb6 ("iommu/vt-d: Remove duplicated code for 
device hotplug")

Reported-and-tested-by: Vlad Buslov 
Suggested-by: Robin Murphy 
Link: https://lkml.org/lkml/2019/7/26/1133
Signed-off-by: Lu Baolu 
Signed-off-by: Joerg Roedel 

Best regards,
Lu Baolu



Signed-off-by: Janusz Krzysztofik 
---
  drivers/iommu/intel-iommu.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 12d094d08c0a..7cdcd0595408 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2477,6 +2477,9 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (info2) {
found  = info2->domain;
info2->dev = dev;
+
+   if (dev && !dev->archdata.iommu)
+   dev->archdata.iommu = info2;
}
}
  



Re: [Intel-gfx] IOMMU RMRR for Intel graphic device

2019-05-15 Thread Lu Baolu

Hi Daniel,

On 5/10/19 4:34 PM, Daniel Vetter wrote:

On Fri, May 10, 2019 at 01:21:59PM +0800, Lu Baolu wrote:

Forward to i915 mailing list and post the question again so people knows
what are the key concerns.

The background is that Linux community is going to collect and report
the reserved memory ranges of an assigned device to VFIO driver, and any
mapped address will be checked against the reserved ranges. If there's
any conflict, vifo will refuse the map request.

Unfortunately, when it comes Intel graphic device, the conflict happens.
That means address range mapped through vfio conflicts with the rmrr for
graphic device.

https://lkml.org/lkml/2018/6/5/760

The questions are 1) will the rmrr for graphic device still needs to be
reserved for BIOS or firmware use, when a device is going to be assigned
to user level? 2) if no, what's the check point, after which the rmrr is
unnecessary anymore?


The gfx RMRR isn't for the bios, it's for the driver. It covers stolen
memory, and we need it. There's various piles of hacks to disable use of
stolen, but they're all somewhat fragile, and afaiui for huc/guc we need
it, and huc is part of the uapi exposed by the driver/device combo on
gen9+.

Until our hw folks come up with a better design I think we're just stuck
on this, iow you can't pass-thru modern intel igfx because of this.
-Daniel



Thanks for the explanation. It's clear to me now.

Best regards,
Lu Baolu



Best regards,
Lu Baolu

On 5/9/19 5:14 PM, Zhang, Tina wrote:

Hi Baolu,

+Xiong

I think the root cause is that guest i915 needs to access RMRR. Xiong cooked a 
patch to disable the RMRR use in i915 guest, however that patch didn't get 
landed into the i915 upstream repo due to some concerns from i915 maintainers. 
Xiong can give us more backgrounds.

So agreed, need to ask the i915 folk for this.

BR,
Tina



-Original Message-
From: Yuan, Hang
Sent: Thursday, May 9, 2019 4:07 PM
To: Lu Baolu ; Tian, Kevin ;
Zhenyu Wang ; Zhang, Tina
; Lu, Baolu ; Liu, Yi L

Subject: RE: IOMMU RMRR for Intel graphic device

Hi Baolu, as Kevin suggested, would you like to ask i915 people in their
mailing list intel-gfx@lists.freedesktop.org?

Regards,
Henry


-Original Message-
From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, May 9, 2019 2:42 PM
To: Tian, Kevin ; Zhenyu Wang
; Zhang, Tina ; Yuan,
Hang ; Lu, Baolu ; Liu, Yi L

Cc: baolu...@linux.intel.com
Subject: Re: IOMMU RMRR for Intel graphic device

Hi,

+Tina and Henry and cc more people

The background is that Linux community is going to collect and report
the reserved memory ranges of an assigned device to VFIO driver, and
any mapped address will be checked against the reserved ranges. If
there's any conflict, vifo will refuse the map request.

Unfortunately, when it comes Intel graphic device, the conflict happens.
That means address range mapped through vfio conflicts with the rmrr
for graphic device.

https://lkml.org/lkml/2018/6/5/760

The questions are 1) will the rmrr for graphic device still needs to
be reserved for BIOS or firmware use, when a device is going to be
assigned to user level? 2) if no, what's the check point, after which
the rmrr is unnecessary anymore?

Best regards,
Lu Baolu

On 5/6/19 2:16 PM, Tian, Kevin wrote:

this should better be asked to i915 guys, since it's not
virtualization related. :-)

One caveat, iirc, i915 driver tries to reuse stolen memory (covered
by
RMRR) even after boot time. take it as if another type of memory
resource. If true I'm afraid this might be a gap to your proposal.

Since nothing confidential, possibly you can directly discuss in

community.



From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, May 2, 2019 2:45 PM

Ping...

Any comments? This has been postponed in the community for a long

time.

We need to response this as soon as possible.

Best regards,
Lu Baolu

On 4/29/19 1:19 PM, Lu Baolu wrote:

Hi Zhenyu,

As we discussed, BIOS always exports IOMMU reserved memory

regions

for (a.k.a. RMRR in vt-d spec) Intel integrated graphic device.
This caused some problems when we pass-through such graphic
devices to

user level.


I am about to propose something to the community so that a RMRR
for graphic devices could be explicitly canceled as long as the
driver
(i915 or vfio) knows that the RMRR will never be used by BIOS

anymore.


The same story happens for USB host controller devices. And since
we know that BIOS will stop using that memory region as soon as
the driver clears the SMI bits.

So the question is, can graphic driver know when the RMRR for
graphic could be canceled?

Best regards,
Lu Baolu


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] IOMMU RMRR for Intel graphic device

2019-05-09 Thread Lu Baolu

Forward to i915 mailing list and post the question again so people knows
what are the key concerns.

The background is that Linux community is going to collect and report
the reserved memory ranges of an assigned device to VFIO driver, and any
mapped address will be checked against the reserved ranges. If there's
any conflict, vifo will refuse the map request.

Unfortunately, when it comes Intel graphic device, the conflict happens.
That means address range mapped through vfio conflicts with the rmrr for
graphic device.

https://lkml.org/lkml/2018/6/5/760

The questions are 1) will the rmrr for graphic device still needs to be
reserved for BIOS or firmware use, when a device is going to be assigned
to user level? 2) if no, what's the check point, after which the rmrr is
unnecessary anymore?

Best regards,
Lu Baolu

On 5/9/19 5:14 PM, Zhang, Tina wrote:

Hi Baolu,

+Xiong

I think the root cause is that guest i915 needs to access RMRR. Xiong cooked a 
patch to disable the RMRR use in i915 guest, however that patch didn't get 
landed into the i915 upstream repo due to some concerns from i915 maintainers. 
Xiong can give us more backgrounds.

So agreed, need to ask the i915 folk for this.

BR,
Tina



-Original Message-
From: Yuan, Hang
Sent: Thursday, May 9, 2019 4:07 PM
To: Lu Baolu ; Tian, Kevin ;
Zhenyu Wang ; Zhang, Tina
; Lu, Baolu ; Liu, Yi L

Subject: RE: IOMMU RMRR for Intel graphic device

Hi Baolu, as Kevin suggested, would you like to ask i915 people in their
mailing list intel-gfx@lists.freedesktop.org?

Regards,
Henry


-Original Message-
From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, May 9, 2019 2:42 PM
To: Tian, Kevin ; Zhenyu Wang
; Zhang, Tina ; Yuan,
Hang ; Lu, Baolu ; Liu, Yi L

Cc: baolu...@linux.intel.com
Subject: Re: IOMMU RMRR for Intel graphic device

Hi,

+Tina and Henry and cc more people

The background is that Linux community is going to collect and report
the reserved memory ranges of an assigned device to VFIO driver, and
any mapped address will be checked against the reserved ranges. If
there's any conflict, vifo will refuse the map request.

Unfortunately, when it comes Intel graphic device, the conflict happens.
That means address range mapped through vfio conflicts with the rmrr
for graphic device.

https://lkml.org/lkml/2018/6/5/760

The questions are 1) will the rmrr for graphic device still needs to
be reserved for BIOS or firmware use, when a device is going to be
assigned to user level? 2) if no, what's the check point, after which
the rmrr is unnecessary anymore?

Best regards,
Lu Baolu

On 5/6/19 2:16 PM, Tian, Kevin wrote:

this should better be asked to i915 guys, since it's not
virtualization related. :-)

One caveat, iirc, i915 driver tries to reuse stolen memory (covered
by
RMRR) even after boot time. take it as if another type of memory
resource. If true I'm afraid this might be a gap to your proposal.

Since nothing confidential, possibly you can directly discuss in

community.



From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, May 2, 2019 2:45 PM

Ping...

Any comments? This has been postponed in the community for a long

time.

We need to response this as soon as possible.

Best regards,
Lu Baolu

On 4/29/19 1:19 PM, Lu Baolu wrote:

Hi Zhenyu,

As we discussed, BIOS always exports IOMMU reserved memory

regions

for (a.k.a. RMRR in vt-d spec) Intel integrated graphic device.
This caused some problems when we pass-through such graphic
devices to

user level.


I am about to propose something to the community so that a RMRR
for graphic devices could be explicitly canceled as long as the
driver
(i915 or vfio) knows that the RMRR will never be used by BIOS

anymore.


The same story happens for USB host controller devices. And since
we know that BIOS will stop using that memory region as soon as
the driver clears the SMI bits.

So the question is, can graphic driver know when the RMRR for
graphic could be canceled?

Best regards,
Lu Baolu


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 1/1] Revert "iommu/vt-d: Clean up pasid quirk for pre-production devices"

2018-07-16 Thread Lu Baolu
Hi Joerg,

The graphic guys are looking forward to having this in 4.18.
Is it possible to take it in the following rcs?

Best regards,
Lu Baolu

On 07/08/2018 02:23 PM, Lu Baolu wrote:
> This reverts commit ab96746aaa344fb720a198245a837e266fad3b62.
>
> The commit ab96746aaa34 ("iommu/vt-d: Clean up pasid quirk for
> pre-production devices") triggers ECS mode on some platforms
> which have broken ECS support. As the result, graphic device
> will be inoperable on boot.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107017
>
> Cc: Ashok Raj 
> Signed-off-by: Lu Baolu 
> ---
>  drivers/iommu/intel-iommu.c | 32 ++--
>  include/linux/intel-iommu.h |  1 +
>  2 files changed, 31 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index b344a88..115ff26 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -484,14 +484,37 @@ static int dmar_forcedac;
>  static int intel_iommu_strict;
>  static int intel_iommu_superpage = 1;
>  static int intel_iommu_ecs = 1;
> +static int intel_iommu_pasid28;
>  static int iommu_identity_mapping;
>  
>  #define IDENTMAP_ALL 1
>  #define IDENTMAP_GFX 2
>  #define IDENTMAP_AZALIA  4
>  
> -#define ecs_enabled(iommu)   (intel_iommu_ecs && ecap_ecs(iommu->ecap))
> -#define pasid_enabled(iommu) (ecs_enabled(iommu) && ecap_pasid(iommu->ecap))
> +/* Broadwell and Skylake have broken ECS support — normal so-called "second
> + * level" translation of DMA requests-without-PASID doesn't actually happen
> + * unless you also set the NESTE bit in an extended context-entry. Which of
> + * course means that SVM doesn't work because it's trying to do nested
> + * translation of the physical addresses it finds in the process page tables,
> + * through the IOVA->phys mapping found in the "second level" page tables.
> + *
> + * The VT-d specification was retroactively changed to change the definition
> + * of the capability bits and pretend that Broadwell/Skylake never 
> happened...
> + * but unfortunately the wrong bit was changed. It's ECS which is broken, but
> + * for some reason it was the PASID capability bit which was redefined (from
> + * bit 28 on BDW/SKL to bit 40 in future).
> + *
> + * So our test for ECS needs to eschew those implementations which set the 
> old
> + * PASID capabiity bit 28, since those are the ones on which ECS is broken.
> + * Unless we are working around the 'pasid28' limitations, that is, by 
> putting
> + * the device into passthrough mode for normal DMA and thus masking the bug.
> + */
> +#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
> + (intel_iommu_pasid28 || 
> !ecap_broken_pasid(iommu->ecap)))
> +/* PASID support is thus enabled if ECS is enabled and *either* of the old
> + * or new capability bits are set. */
> +#define pasid_enabled(iommu) (ecs_enabled(iommu) &&  \
> +   (ecap_pasid(iommu->ecap) || 
> ecap_broken_pasid(iommu->ecap)))
>  
>  int intel_iommu_gfx_mapped;
>  EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
> @@ -554,6 +577,11 @@ static int __init intel_iommu_setup(char *str)
>   printk(KERN_INFO
>   "Intel-IOMMU: disable extended context table 
> support\n");
>   intel_iommu_ecs = 0;
> + } else if (!strncmp(str, "pasid28", 7)) {
> + printk(KERN_INFO
> + "Intel-IOMMU: enable pre-production PASID 
> support\n");
> + intel_iommu_pasid28 = 1;
> + iommu_identity_mapping |= IDENTMAP_GFX;
>   } else if (!strncmp(str, "tboot_noforce", 13)) {
>   printk(KERN_INFO
>   "Intel-IOMMU: not forcing on after tboot. This 
> could expose security risk for tboot\n");
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 1df9401..ef169d6 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -121,6 +121,7 @@
>  #define ecap_srs(e)  ((e >> 31) & 0x1)
>  #define ecap_ers(e)  ((e >> 30) & 0x1)
>  #define ecap_prs(e)  ((e >> 29) & 0x1)
> +#define ecap_broken_pasid(e) ((e >> 28) & 0x1)
>  #define ecap_dis(e)  ((e >> 27) & 0x1)
>  #define ecap_nest(e) ((e >> 26) & 0x1)
>  #define ecap_mts(e)  ((e >> 25) & 0x1)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx