On 13/05/2019 08:46, Auger Eric wrote:
Hi Robin,On 5/8/19 7:20 PM, Robin Murphy wrote:On 08/04/2019 13:19, Eric Auger wrote:When a stage 1 related fault event is read from the event queue, let's propagate it to potential external fault listeners, ie. users who registered a fault handler. Signed-off-by: Eric Auger <eric.au...@redhat.com> --- v4 -> v5: - s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC --- drivers/iommu/arm-smmu-v3.c | 169 +++++++++++++++++++++++++++++++++--- 1 file changed, 158 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8044445bc32a..1fd320788dcb 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -167,6 +167,26 @@ #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc +/* Events */ +#define ARM_SMMU_EVT_F_UUT 0x01 +#define ARM_SMMU_EVT_C_BAD_STREAMID 0x02 +#define ARM_SMMU_EVT_F_STE_FETCH 0x03 +#define ARM_SMMU_EVT_C_BAD_STE 0x04 +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ 0x05 +#define ARM_SMMU_EVT_F_STREAM_DISABLED 0x06 +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN 0x07 +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID 0x08 +#define ARM_SMMU_EVT_F_CD_FETCH 0x09 +#define ARM_SMMU_EVT_C_BAD_CD 0x0a +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b +#define ARM_SMMU_EVT_F_TRANSLATION 0x10 +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11 +#define ARM_SMMU_EVT_F_ACCESS 0x12 +#define ARM_SMMU_EVT_F_PERMISSION 0x13 +#define ARM_SMMU_EVT_F_TLB_CONFLICT 0x20 +#define ARM_SMMU_EVT_F_CFG_CONFLICT 0x21 +#define ARM_SMMU_EVT_E_PAGE_REQUEST 0x24 + /* Common MSI config fields */ #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) #define MSI_CFG2_SH GENMASK(5, 4) @@ -332,6 +352,15 @@ #define EVTQ_MAX_SZ_SHIFT 7 #define EVTQ_0_ID GENMASK_ULL(7, 0) +#define EVTQ_0_SSV GENMASK_ULL(11, 11) +#define EVTQ_0_SUBSTREAMID GENMASK_ULL(31, 12) +#define EVTQ_0_STREAMID GENMASK_ULL(63, 32) +#define EVTQ_1_PNU GENMASK_ULL(33, 33) +#define EVTQ_1_IND GENMASK_ULL(34, 34) +#define EVTQ_1_RNW GENMASK_ULL(35, 35) +#define EVTQ_1_S2 GENMASK_ULL(39, 39) +#define EVTQ_1_CLASS GENMASK_ULL(40, 41) +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) /* PRI queue */ #define PRIQ_ENT_DWORDS 2 @@ -639,6 +668,64 @@ struct arm_smmu_domain { spinlock_t devices_lock; }; +/* fault propagation */ + +#define IOMMU_FAULT_F_FIELDS (IOMMU_FAULT_UNRECOV_PASID_VALID | \ + IOMMU_FAULT_UNRECOV_PERM_VALID | \ + IOMMU_FAULT_UNRECOV_ADDR_VALID) + +struct arm_smmu_fault_propagation_data { + enum iommu_fault_reason reason; + bool s1_check; + u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */ +}; + +/* + * Describes how SMMU faults translate into generic IOMMU faults + * and if they need to be reported externally + */ +static const struct arm_smmu_fault_propagation_data fault_propagation[] = { +[ARM_SMMU_EVT_F_UUT] = { }, +[ARM_SMMU_EVT_C_BAD_STREAMID] = { }, +[ARM_SMMU_EVT_F_STE_FETCH] = { }, +[ARM_SMMU_EVT_C_BAD_STE] = { }, +[ARM_SMMU_EVT_F_BAD_ATS_TREQ] = { }, +[ARM_SMMU_EVT_F_STREAM_DISABLED] = { }, +[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN] = { }, +[ARM_SMMU_EVT_C_BAD_SUBSTREAMID] = {IOMMU_FAULT_REASON_PASID_INVALID, + false, + IOMMU_FAULT_UNRECOV_PASID_VALID + }, +[ARM_SMMU_EVT_F_CD_FETCH] = {IOMMU_FAULT_REASON_PASID_FETCH, + false, + IOMMU_FAULT_UNRECOV_PASID_VALID |It doesn't make sense to presume validity here, or in any of the faults below...+ IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID + }, +[ARM_SMMU_EVT_C_BAD_CD] = {IOMMU_FAULT_REASON_BAD_PASID_ENTRY, + false, + IOMMU_FAULT_UNRECOV_PASID_VALID + }, +[ARM_SMMU_EVT_F_WALK_EABT] = {IOMMU_FAULT_REASON_WALK_EABT, true, + IOMMU_FAULT_F_FIELDS | + IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID + }, +[ARM_SMMU_EVT_F_TRANSLATION] = {IOMMU_FAULT_REASON_PTE_FETCH, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_ADDR_SIZE] = {IOMMU_FAULT_REASON_OOR_ADDRESS, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_ACCESS] = {IOMMU_FAULT_REASON_ACCESS, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_PERMISSION] = {IOMMU_FAULT_REASON_PERMISSION, true, + IOMMU_FAULT_F_FIELDS + }, +[ARM_SMMU_EVT_F_TLB_CONFLICT] = { }, +[ARM_SMMU_EVT_F_CFG_CONFLICT] = { }, +[ARM_SMMU_EVT_E_PAGE_REQUEST] = { }, +}; + struct arm_smmu_option_prop { u32 opt; const char *prop; @@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; } -__maybe_unused static struct arm_smmu_master_data * arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) { @@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) return master; } +/* Populates the record fields according to the input SMMU event */ +static bool arm_smmu_transcode_fault(u64 *evt, u8 type, + struct iommu_fault_unrecoverable *record) +{ + const struct arm_smmu_fault_propagation_data *data; + u32 fields; + + if (type >= ARRAY_SIZE(fault_propagation)) + return false; + + data = &fault_propagation[type]; + if (!data->reason) + return false; + + fields = data->fields; + + if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1])) + return false; /* S2 related fault, don't propagate */ + + if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) { + if (FIELD_GET(EVTQ_0_SSV, evt[0])) + record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]); + else + fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID;...because this logic then breaks for C_BAD_SUBSTREAMID, which ends up coming out of here *without* reporting the offending PASID.Correct.+ } + if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) { + if (!FIELD_GET(EVTQ_1_RNW, evt[1])) + record->perm |= IOMMU_FAULT_PERM_WRITE; + if (FIELD_GET(EVTQ_1_PNU, evt[1])) + record->perm |= IOMMU_FAULT_PERM_PRIV; + if (FIELD_GET(EVTQ_1_IND, evt[1])) + record->perm |= IOMMU_FAULT_PERM_EXEC; + } + if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID) + record->addr = evt[2]; + + if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID) + record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]); + + record->flags = fields; + return true; +} + +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 *evt) +{ + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]); + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]); + struct arm_smmu_master_data *master; + struct iommu_fault_event event = {}; + int i; + + master = arm_smmu_find_master(smmu, sid); + if (WARN_ON(!master)) + return;NAK. If I'm getting global faults like C_BAD_STE where a device almost certainly *isn't* configured (because hey, we would have initialised its STEs if we knew), then I sure as hell want to see the actual faults. Spamming a constant stream of stack traces *instead* of showing them is worse than useless.Sure, if !master I will output the original traces.+ + event.fault.type = IOMMU_FAULT_DMA_UNRECOV; + + if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) { + iommu_report_device_fault(master->dev, &event); + return;And again, the vast majority of the time, there won't be a fault handler registered, so unconditionally suppressing the most common and useful stuff like translation and permission faults is very much not OK.Going to test whether we are in nested mode before entering that path.
I don't think this has to be exclusive to nesting - the generic reporting mechanism feels like it might ultimately be extensible to other things like Rob's case for generalised stalling. It's just that for robustness, even when a fault handler is present, we still want the driver to be able to report if it didn't actually handle a fault.
Thanks, Robin.