PCIe devices can implement their own TLB, named Address Translation Cache
(ATC). Enable Address Translation Service (ATS) for devices that support
it and send them invalidation requests whenever we invalidate the IOTLBs.
ATC invalidation is allowed to take up to 90 seconds, according to the
PCIe spec, so it is possible to get a SMMU command queue timeout during
normal operations. However we expect implementations to complete
invalidation in reasonable time.
We only enable ATS for "trusted" devices, and currently rely on the
pci_dev->untrusted bit. For ATS we have to trust that:
(a) The device doesn't issue "translated" memory requests for addresses
that weren't returned by the SMMU in a Translation Completion. In
particular, if we give control of a device or device partition to a VM
or userspace, software cannot program the device to access arbitrary
"translated" addresses.
(b) The device follows permissions granted by the SMMU in a Translation
Completion. If the device requested read+write permission and only
got read, then it doesn't write.
(c) The device doesn't send Translated transactions for an address that
was invalidated by an ATC invalidation.
Note that the PCIe specification explicitly requires all of these, so we
can assume that implementations will cleanly shield ATCs from software.
All ATS translated requests still go through the SMMU, to walk the stream
table and check that the device is actually allowed to send translated
requests.
Signed-off-by: Jean-Philippe Brucker
---
drivers/iommu/arm-smmu-v3.c | 201 ++--
1 file changed, 195 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 3e7198ee9530..3bde137a3755 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -29,6 +29,7 @@
#include
#include
#include
+#include
#include
#include
@@ -86,6 +87,7 @@
#define IDR5_VAX_52_BIT1
#define ARM_SMMU_CR0 0x20
+#define CR0_ATSCHK (1 << 4)
#define CR0_CMDQEN (1 << 3)
#define CR0_EVTQEN (1 << 2)
#define CR0_PRIQEN (1 << 1)
@@ -294,6 +296,7 @@
#define CMDQ_ERR_CERROR_NONE_IDX 0
#define CMDQ_ERR_CERROR_ILL_IDX1
#define CMDQ_ERR_CERROR_ABT_IDX2
+#define CMDQ_ERR_CERROR_ATC_INV_IDX3
#define CMDQ_0_OP GENMASK_ULL(7, 0)
#define CMDQ_0_SSV (1UL << 11)
@@ -312,6 +315,12 @@
#define CMDQ_TLBI_1_VA_MASKGENMASK_ULL(63, 12)
#define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
+#define CMDQ_ATC_0_SSIDGENMASK_ULL(31, 12)
+#define CMDQ_ATC_0_SID GENMASK_ULL(63, 32)
+#define CMDQ_ATC_0_GLOBAL (1UL << 9)
+#define CMDQ_ATC_1_SIZEGENMASK_ULL(5, 0)
+#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
+
#define CMDQ_PRI_0_SSIDGENMASK_ULL(31, 12)
#define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
@@ -433,6 +442,16 @@ struct arm_smmu_cmdq_ent {
u64 addr;
} tlbi;
+ #define CMDQ_OP_ATC_INV 0x40
+ #define ATC_INV_SIZE_ALL52
+ struct {
+ u32 sid;
+ u32 ssid;
+ u64 addr;
+ u8 size;
+ boolglobal;
+ } atc;
+
#define CMDQ_OP_PRI_RESP0x41
struct {
u32 sid;
@@ -580,10 +599,12 @@ struct arm_smmu_device {
/* SMMU private data for each master */
struct arm_smmu_master {
struct arm_smmu_device *smmu;
+ struct device *dev;
struct arm_smmu_domain *domain;
struct list_headdomain_head;
u32 *sids;
unsigned intnum_sids;
+ boolats_enabled :1;
};
/* SMMU private data for an IOMMU domain */
@@ -813,6 +834,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct
arm_smmu_cmdq_ent *ent)
case CMDQ_OP_TLBI_S12_VMALL:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
break;
+ case CMDQ_OP_ATC_INV:
+ cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
+ cmd[1] |=