On Wed, Jan 15, 2020 at 3:21 AM Auger Eric <eric.au...@redhat.com> wrote: > > Hi Rob, > > On 1/13/20 3:39 PM, Rob Herring wrote: > > Arm SMMUv3.2 adds support for TLB range invalidate operations. > > Support for range invalidate is determined by the RIL bit in the IDR3 > > register. > > > > The range invalidate is in units of the leaf page size and operates on > > 1-32 chunks of a power of 2 multiple pages. First we determine from the > > size what power of 2 multiple we can use and then adjust the granule to > > 32x that size. > > > > Cc: Eric Auger <eric.au...@redhat.com> > > Cc: Jean-Philippe Brucker <jean-phili...@linaro.org> > > Cc: Will Deacon <w...@kernel.org> > > Cc: Robin Murphy <robin.mur...@arm.com> > > Cc: Joerg Roedel <j...@8bytes.org> > > Signed-off-by: Rob Herring <r...@kernel.org> > > --- > > drivers/iommu/arm-smmu-v3.c | 53 +++++++++++++++++++++++++++++++++++++ > > 1 file changed, 53 insertions(+) > > > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c > > index e91b4a098215..8b6b3e2aa383 100644 > > --- a/drivers/iommu/arm-smmu-v3.c > > +++ b/drivers/iommu/arm-smmu-v3.c > > @@ -70,6 +70,9 @@ > > #define IDR1_SSIDSIZE GENMASK(10, 6) > > #define IDR1_SIDSIZE GENMASK(5, 0) > > > > +#define ARM_SMMU_IDR3 0xc > > +#define IDR3_RIL (1 << 10) > > + > > #define ARM_SMMU_IDR5 0x14 > > #define IDR5_STALL_MAX GENMASK(31, 16) > > #define IDR5_GRAN64K (1 << 6) > > @@ -327,9 +330,14 @@ > > #define CMDQ_CFGI_1_LEAF (1UL << 0) > > #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0) > > > > +#define CMDQ_TLBI_0_NUM GENMASK_ULL(16, 12) > > +#define CMDQ_TLBI_RANGE_NUM_MAX 32 > > +#define CMDQ_TLBI_0_SCALE GENMASK_ULL(24, 20) > > #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32) > > #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48) > > #define CMDQ_TLBI_1_LEAF (1UL << 0) > > +#define CMDQ_TLBI_1_TTL GENMASK_ULL(9, 8) > > +#define CMDQ_TLBI_1_TG GENMASK_ULL(11, 10) > > #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12) > > #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12) > > > > @@ -455,9 +463,13 @@ struct arm_smmu_cmdq_ent { > > #define CMDQ_OP_TLBI_S2_IPA 0x2a > > #define CMDQ_OP_TLBI_NSNH_ALL 0x30 > > struct { > > + u8 num; > > + u8 scale; > > u16 asid; > > u16 vmid; > > bool leaf; > > + u8 ttl; > > + u8 tg; > > u64 addr; > > } tlbi; > > > > @@ -595,6 +607,7 @@ struct arm_smmu_device { > > #define ARM_SMMU_FEAT_HYP (1 << 12) > > #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13) > > #define ARM_SMMU_FEAT_VAX (1 << 14) > > +#define ARM_SMMU_FEAT_RANGE_INV (1 << 15) > > u32 features; > > > > #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) > > @@ -856,13 +869,21 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct > > arm_smmu_cmdq_ent *ent) > > cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); > > break; > > case CMDQ_OP_TLBI_NH_VA: > > + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); > > + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); > > cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); > > cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); > > + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); > > + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); > > cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; > > break; > > case CMDQ_OP_TLBI_S2_IPA: > > + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); > > + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); > > cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); > > cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); > > + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); > > + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); > > cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK; > > break; > > case CMDQ_OP_TLBI_NH_ASID: > > @@ -2022,12 +2043,39 @@ static void arm_smmu_tlb_inv_range(unsigned long > > iova, size_t size, > > cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; > > } > > > > + if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { > > + unsigned long tg, scale; > > + > > + /* Get the leaf page size */ > > + tg = __ffs(smmu_domain->domain.pgsize_bitmap); > it is unclear to me why you can't set tg with the granule parameter.
granule could be 2MB sections if THP is enabled, right? > > + > > + /* Determine the power of 2 multiple number of pages */ > > + scale = __ffs(size / (1UL << tg)); > > + cmd.tlbi.scale = scale; > > + > > + cmd.tlbi.num = CMDQ_TLBI_RANGE_NUM_MAX - 1; > Also could you explain why you use CMDQ_TLBI_RANGE_NUM_MAX. How's this: /* The invalidation loop defaults to the maximum range */ And perhaps I'll move it next to setting granule. > > + > > + /* Convert page size of 12,14,16 (log2) to 1,2,3 */ > > + cmd.tlbi.tg = ((tg - ilog2(SZ_4K)) / 2) + 1; > > + > > + /* Determine what level the granule is at */ > > + cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); > > + > > + /* Adjust granule to the maximum range */ > > + granule = CMDQ_TLBI_RANGE_NUM_MAX * (1 << scale) * (1UL << > > tg); > spec says > Range = ((NUM+1)*2 ^ SCALE )*Translation_Granule_Size (NUM+1) can be 1-32. I went with the logical max for CMDQ_TLBI_RANGE_NUM_MAX rather than the NUM field value max. Rob _______________________________________________ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu