Re: [PATCH 5/8] iommu/arm-smmu-v3: Add second level of context descriptor table

2019-09-19 Thread Jean-Philippe Brucker
On Mon, Jul 08, 2019 at 05:13:05PM +0200, Auger Eric wrote:
> >  #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
> >  #define STRTAB_STE_0_S1FMT_LINEAR  0
> > +#define STRTAB_STE_0_S1FMT_4K_L2   1
> As you only use 64kB L2, I guess you can remove the 4K define?

I prefer defining all values, but I suppose I can get rid of it.

> > +   cfg->tables = devm_kzalloc(smmu->dev, sizeof(struct arm_smmu_cd_table) *
> > +  cfg->num_tables, GFP_KERNEL);
> > +   if (!cfg->tables)
> > +   return -ENOMEM;
> goto err_free_l1
> > +
> > +   ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0], 
> > num_leaf_entries);
> don't you want to do that only in linear case. In 2-level mode, I
> understand arm_smmu_get_cd_ptr() will do the job.

OK, that might be better

> 
> > +   if (ret)
> > +   goto err_free_l1;
> > +
> > +   if (cfg->l1ptr)
> > +   arm_smmu_write_cd_l1_desc(cfg->l1ptr, &cfg->tables[0]);
> that stuff could be removed as well?

Yes

> By the way I can see that
> arm_smmu_get_cd_ptr() does a arm_smmu_sync_cd after. wouldn't it be
> needed here as well?

No context table is reachable from a STE at this point, so we don't have
to invalidate anything.

> > @@ -1815,7 +1935,7 @@ static void arm_smmu_domain_free(struct iommu_domain 
> > *domain)
> > if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> > struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> >  
> > -   if (cfg->table.ptr) {
> > +   if (cfg->tables) {
> > arm_smmu_free_cd_tables(smmu_domain);
> > arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
> I don't get why the arm_smmu_bitmap_free is dependent on cfg->tables.

Simply because arm_smmu_bitmap_alloc() and arm_smmu_alloc_cd_tables()
are both performed in arm_smmu_domain_finalise_s1(). A domain returned
by arm_smmu_domain_alloc() is not fully initialized, it still needs to
be finalized by arm_smmu_attach_dev(). So here we check whether the
domain has been finalized or not. Zero being a valid ASID in this
driver, we can't check whether cfg->cd.asid is valid, so we check
cfg->tables instead.

And I forgot to clear cfg->tables after failure of domain_finalise in
this series, I'll need to fix it.

Thanks,
Jean


Re: [PATCH 5/8] iommu/arm-smmu-v3: Add second level of context descriptor table

2019-07-08 Thread Auger Eric
Hi Jean,

On 6/10/19 8:47 PM, Jean-Philippe Brucker wrote:
> The SMMU can support up to 20 bits of SSID. Add a second level of page
> tables to accommodate this. Devices that support more than 1024 SSIDs now
> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> descriptors (64kB), allocated on demand.
> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  drivers/iommu/arm-smmu-v3.c | 136 +---
>  1 file changed, 128 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index d90eb604b65d..326b71793336 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -216,6 +216,8 @@
>  
>  #define STRTAB_STE_0_S1FMT   GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR0
> +#define STRTAB_STE_0_S1FMT_4K_L2 1
As you only use 64kB L2, I guess you can remove the 4K define?
> +#define STRTAB_STE_0_S1FMT_64K_L22
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> @@ -255,6 +257,18 @@
>  
>  #define STRTAB_STE_3_S2TTB_MASK  GENMASK_ULL(51, 4)
>  
> +/*
> + * Linear: when less than 1024 SSIDs are supported
> + * 2lvl: at most 1024 L1 entrie,
entries
> + *  1024 lazy entries per table.
> + */
> +#define CTXDESC_SPLIT10
> +#define CTXDESC_NUM_L2_ENTRIES   (1 << CTXDESC_SPLIT)
> +
> +#define CTXDESC_L1_DESC_DWORD1
> +#define CTXDESC_L1_DESC_VALID1
> +#define CTXDESC_L1_DESC_L2PTR_MASK   GENMASK_ULL(51, 12)
> +
>  /* Context descriptor (stage-1 only) */
>  #define CTXDESC_CD_DWORDS8
>  #define CTXDESC_CD_0_TCR_T0SZGENMASK_ULL(5, 0)
> @@ -530,7 +544,10 @@ struct arm_smmu_ctx_desc {
>  struct arm_smmu_s1_cfg {
>   u8  s1fmt;
>   u8  s1cdmax;
> - struct arm_smmu_cd_tabletable;
> + struct arm_smmu_cd_table*tables;
> + size_t  num_tables;
> + __le64  *l1ptr;
> + dma_addr_t  l1ptr_dma;
>  
>   /* Context descriptor 0, when substreams are disabled or s1dss = 0b10 */
>   struct arm_smmu_ctx_desccd;
> @@ -1118,12 +1135,51 @@ static void arm_smmu_free_cd_leaf_table(struct 
> arm_smmu_device *smmu,
>  {
>   size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
>  
> + if (!table->ptr)
> + return;
>   dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
>  }
>  
> -static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_s1_cfg *cfg, u32 ssid)
> +static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> +   struct arm_smmu_cd_table *table)
>  {
> - return cfg->table.ptr + ssid * CTXDESC_CD_DWORDS;
> + u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
> +   CTXDESC_L1_DESC_VALID;
> +
> + *dst = cpu_to_le64(val);
> +}
> +
> +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
> +u32 ssid)> +{
> + unsigned int idx;
> + struct arm_smmu_cd_table *table;
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> +
> + if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> + table = &cfg->tables[0];
> + idx = ssid;
> + } else {
> + idx = ssid >> CTXDESC_SPLIT;
> + if (idx >= cfg->num_tables)
> + return NULL;
> +
> + table = &cfg->tables[idx];
> + if (!table->ptr) {
> + __le64 *l1ptr = cfg->l1ptr + idx * 
> CTXDESC_L1_DESC_DWORD;
> +
> + if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +  
> CTXDESC_NUM_L2_ENTRIES))
> + return NULL;
> +
> + arm_smmu_write_cd_l1_desc(l1ptr, table);
> + /* An invalid L1 entry is allowed to be cached */
> + arm_smmu_sync_cd(smmu_domain, ssid, false);
> + }
> + idx = ssid & (CTXDESC_NUM_L2_ENTRIES - 1);
> + }
> + return table->ptr + idx * CTXDESC_CD_DWORDS;
>  }
>  
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
> @@ -1149,7 +1205,7 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>   u64 val;
>   bool cd_live;
>   struct arm_smmu_device *smmu = smmu_domain->smmu;
> - __le64 *cdptr = arm_smmu_get_cd_ptr(&smmu_domain->s1_cfg, ssid);
> + __le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
>  
>   /*
>* This function handles the following cases:
> @@ -1213,20 +1269,81 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
>

Re: [PATCH 5/8] iommu/arm-smmu-v3: Add second level of context descriptor table

2019-06-11 Thread Jonathan Cameron
On Mon, 10 Jun 2019 19:47:11 +0100
Jean-Philippe Brucker  wrote:

> The SMMU can support up to 20 bits of SSID. Add a second level of page
> tables to accommodate this. Devices that support more than 1024 SSIDs now
> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> descriptors (64kB), allocated on demand.
> 
> Signed-off-by: Jean-Philippe Brucker 
One trivial typo.

Thanks,

Jonathan
> ---
>  drivers/iommu/arm-smmu-v3.c | 136 +---
>  1 file changed, 128 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index d90eb604b65d..326b71793336 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -216,6 +216,8 @@
>  
>  #define STRTAB_STE_0_S1FMT   GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR0
> +#define STRTAB_STE_0_S1FMT_4K_L2 1
> +#define STRTAB_STE_0_S1FMT_64K_L22
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> @@ -255,6 +257,18 @@
>  
>  #define STRTAB_STE_3_S2TTB_MASK  GENMASK_ULL(51, 4)
>  
> +/*
> + * Linear: when less than 1024 SSIDs are supported
> + * 2lvl: at most 1024 L1 entrie,

entries?

> + *  1024 lazy entries per table.
> + */
> +#define CTXDESC_SPLIT10
> +#define CTXDESC_NUM_L2_ENTRIES   (1 << CTXDESC_SPLIT)
> +
> +#define CTXDESC_L1_DESC_DWORD1
> +#define CTXDESC_L1_DESC_VALID1
> +#define CTXDESC_L1_DESC_L2PTR_MASK   GENMASK_ULL(51, 12)
> +
>  /* Context descriptor (stage-1 only) */
>  #define CTXDESC_CD_DWORDS8
>  #define CTXDESC_CD_0_TCR_T0SZGENMASK_ULL(5, 0)
> @@ -530,7 +544,10 @@ struct arm_smmu_ctx_desc {
>  struct arm_smmu_s1_cfg {
>   u8  s1fmt;
>   u8  s1cdmax;
> - struct arm_smmu_cd_tabletable;
> + struct arm_smmu_cd_table*tables;
> + size_t  num_tables;
> + __le64  *l1ptr;
> + dma_addr_t  l1ptr_dma;
>  
>   /* Context descriptor 0, when substreams are disabled or s1dss = 0b10 */
>   struct arm_smmu_ctx_desccd;
> @@ -1118,12 +1135,51 @@ static void arm_smmu_free_cd_leaf_table(struct 
> arm_smmu_device *smmu,
>  {
>   size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
>  
> + if (!table->ptr)
> + return;
>   dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
>  }
>  
> -static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_s1_cfg *cfg, u32 ssid)
> +static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> +   struct arm_smmu_cd_table *table)
>  {
> - return cfg->table.ptr + ssid * CTXDESC_CD_DWORDS;
> + u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
> +   CTXDESC_L1_DESC_VALID;
> +
> + *dst = cpu_to_le64(val);
> +}
> +
> +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
> +u32 ssid)
> +{
> + unsigned int idx;
> + struct arm_smmu_cd_table *table;
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> +
> + if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> + table = &cfg->tables[0];
> + idx = ssid;
> + } else {
> + idx = ssid >> CTXDESC_SPLIT;
> + if (idx >= cfg->num_tables)
> + return NULL;
> +
> + table = &cfg->tables[idx];
> + if (!table->ptr) {
> + __le64 *l1ptr = cfg->l1ptr + idx * 
> CTXDESC_L1_DESC_DWORD;
> +
> + if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +  
> CTXDESC_NUM_L2_ENTRIES))
> + return NULL;
> +
> + arm_smmu_write_cd_l1_desc(l1ptr, table);
> + /* An invalid L1 entry is allowed to be cached */
> + arm_smmu_sync_cd(smmu_domain, ssid, false);
> + }
> + idx = ssid & (CTXDESC_NUM_L2_ENTRIES - 1);
> + }
> + return table->ptr + idx * CTXDESC_CD_DWORDS;
>  }
>  
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
> @@ -1149,7 +1205,7 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>   u64 val;
>   bool cd_live;
>   struct arm_smmu_device *smmu = smmu_domain->smmu;
> - __le64 *cdptr = arm_smmu_get_cd_ptr(&smmu_domain->s1_cfg, ssid);
> + __le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
>  
>   /*
>* This function handles the following cases:
> @@ -1213,20 +1269,81 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
>