On Thu, 2018-09-20 at 18:31 +0100, Robin Murphy wrote:
> On 03/09/18 07:01, Yong Wu wrote:
> > MediaTek extend the arm v7s descriptor to support the dram over 4GB.
> > 
> > In the mt2712 and mt8173, it's called "4GB mode", the physical address
> > is from 0x4000_0000 to 0x1_3fff_ffff, but from EMI point of view, it
> > is remapped to high address from 0x1_0000_0000 to 0x1_ffff_ffff, the
> > bit32 is always enabled. thus, in the M4U, we always enable the bit9
> > for all PTEs which means to enable bit32 of physical address.
> > 
> > but in mt8183, M4U support the dram from 0x4000_0000 to 0x3_ffff_ffff
> > which isn't remaped. We extend the PTEs: the bit9 represent bit32 of
> > PA and the bit4 represent bit33 of PA. Meanwhile the iova still is
> > 32bits.
> > 
> > In order to unify code, in the "4GB mode", we add the bit32 for the
> > physical address manually in our driver.
> > 
> > Correspondingly, Adding bit32 and bit33 for the PA in the iova_to_phys
> > has to been moved into v7s.
> > 
> > Signed-off-by: Yong Wu <[email protected]>
> > ---
> > In mt8183, the PA is from 0x4000_0000 to 0x3_ffff_ffff while the iova
> > still is 32bits. Acturally, our HW extend the v7s pgtable. currently
> > the lvl1 pgtable is 16KB, our HW double it. but 32bit iova is enough
> > for us currently, thus we don't change it.
> > ---
> >   drivers/iommu/io-pgtable-arm-v7s.c | 38 
> > ++++++++++++++++++++++++++++++--------
> >   drivers/iommu/io-pgtable.h         |  8 ++++----
> >   drivers/iommu/mtk_iommu.c          | 15 +++++++++------
> >   drivers/iommu/mtk_iommu.h          |  1 +
> >   4 files changed, 44 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
> > b/drivers/iommu/io-pgtable-arm-v7s.c
> > index b5948ba..47538dd 100644
> > --- a/drivers/iommu/io-pgtable-arm-v7s.c
> > +++ b/drivers/iommu/io-pgtable-arm-v7s.c
> > @@ -124,7 +124,9 @@
> >   #define ARM_V7S_TEX_MASK          0x7
> >   #define ARM_V7S_ATTR_TEX(val)             (((val) & ARM_V7S_TEX_MASK) << 
> > ARM_V7S_TEX_SHIFT)
> >   
> > -#define ARM_V7S_ATTR_MTK_4GB               BIT(9) /* MTK extend it for 4GB 
> > mode */
> > +/* MTK extend the two bits below for over 4GB mode */
> > +#define ARM_V7S_ATTR_MTK_PA_BIT32  BIT(9)
> > +#define ARM_V7S_ATTR_MTK_PA_BIT33  BIT(4)
> >   
> >   /* *well, except for TEX on level 2 large pages, of course :( */
> >   #define ARM_V7S_CONT_PAGE_TEX_SHIFT       6
> > @@ -268,7 +270,8 @@ static void __arm_v7s_set_pte(arm_v7s_iopte *ptep, 
> > arm_v7s_iopte pte,
> >   }
> >   
> >   static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
> > -                                    struct io_pgtable_cfg *cfg)
> > +                                    struct io_pgtable_cfg *cfg,
> > +                                    phys_addr_t paddr) /* Only for MTK */
> 
> I'd rather keep this function dedicated to just generating the 
> permissions and attributes. If necessary I'm quite happy to add 
> additional helpers for getting/setting the address, much like LPAE now 
> has for handling 52-bit IOVAs.

Adding the two additional helpers looks need touch many lines. thus, I
use a new patch for it.

Thanks very much for your review.

> 
> >   {
> >     bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
> >     arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S;
> > @@ -295,8 +298,12 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int 
> > lvl,
> >     if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
> >             pte |= ARM_V7S_ATTR_NS_SECTION;
> >   
> > -   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
> > -           pte |= ARM_V7S_ATTR_MTK_4GB;
> > +   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> > +           if (paddr & BIT_ULL(32))
> > +                   pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
> > +           if (paddr & BIT_ULL(33))
> > +                   pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
> > +   }
> >   
> >     return pte;
> >   }
> > @@ -392,7 +399,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable 
> > *data,
> >                     return -EEXIST;
> >             }
> >   
> > -   pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
> > +   pte = arm_v7s_prot_to_pte(prot, lvl, cfg, paddr);
> >     if (num_entries > 1)
> >             pte = arm_v7s_pte_to_cont(pte, lvl);
> >   
> > @@ -484,7 +491,11 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
> > unsigned long iova,
> >     if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
> >             return 0;
> >   
> > -   if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
> > +   if (WARN_ON(upper_32_bits(iova)))
> > +           return -ERANGE;
> > +
> > +   if (WARN_ON(upper_32_bits(paddr) &&
> > +               !(iop->cfg.quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)))
> 
> TBH I'd just cram the quirk check into the right-hand-side of the || 
> rather than introduce a separate if() - there's already too many 
> parentheses to read the whole thing easily ;)

Fix it in v2.
Also, the comments below are fixed.
Thanks.

> 
> >             return -ERANGE;
> >   
> >     ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
> > @@ -563,7 +574,7 @@ static size_t arm_v7s_split_blk_unmap(struct 
> > arm_v7s_io_pgtable *data,
> >     num_entries = size >> ARM_V7S_LVL_SHIFT(2);
> >     unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
> >   
> > -   pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
> > +   pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg, 0);
> >     if (num_entries > 1)
> >             pte = arm_v7s_pte_to_cont(pte, 2);
> >   
> > @@ -677,7 +688,9 @@ static phys_addr_t arm_v7s_iova_to_phys(struct 
> > io_pgtable_ops *ops,
> >                                     unsigned long iova)
> >   {
> >     struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
> > +   struct io_pgtable_cfg *cfg = &data->iop.cfg;
> >     arm_v7s_iopte *ptep = data->pgd, pte;
> > +   phys_addr_t paddr;
> >     int lvl = 0;
> >     u32 mask;
> >   
> > @@ -693,7 +706,16 @@ static phys_addr_t arm_v7s_iova_to_phys(struct 
> > io_pgtable_ops *ops,
> >     mask = ARM_V7S_LVL_MASK(lvl);
> >     if (arm_v7s_pte_is_cont(pte, lvl))
> >             mask *= ARM_V7S_CONT_PAGES;
> > -   return (pte & mask) | (iova & ~mask);
> > +   paddr = (pte & mask) | (iova & ~mask);
> > +
> > +   if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
> > +       cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) {
> > +           if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
> > +                   paddr |= BIT_ULL(32);
> > +           if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> > +                   paddr |= BIT_ULL(33);
> > +   }
> > +   return paddr;
> >   }
> >   
> >   static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg 
> > *cfg,
> > diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
> > index 2df7909..0eeed94 100644
> > --- a/drivers/iommu/io-pgtable.h
> > +++ b/drivers/iommu/io-pgtable.h
> > @@ -62,10 +62,10 @@ struct io_pgtable_cfg {
> >      *      (unmapped) entries but the hardware might do so anyway, perform
> >      *      TLB maintenance when mapping as well as when unmapping.
> >      *
> > -    * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all
> > -    *      PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
> > -    *      when the SoC is in "4GB mode" and they can only access the high
> > -    *      remap of DRAM (0x1_00000000 to 0x1_ffffffff).
> > +    * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 4 and 9 in all
> > +    *      PTEs, for Mediatek IOMMUs which treat it as the 33rd and 32rd
> 
> The "all PTEs" part referred to the fact that io-pgtable was forcing bit 
> 9 on unconditionally. Since that logic is now moving into the MTK driver 
> itself, all the quirk means now from io-pgtable's point of view is that 
> PAs may be up to 34 bits where bits 32 and 33 of the address are encoded 
> in bits 9 and 4 of the PTE respectively...
> 
> > +    *      address bit when the SoC dram is over 4GB and they can access
> > +    *      the physical address from 0x4000_0000 to 0x3_ffff_ffff.
> >      *
> >      * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
> >      *      be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
> > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > index c0e2da5..86bf647 100644
> > --- a/drivers/iommu/mtk_iommu.c
> > +++ b/drivers/iommu/mtk_iommu.c
> > @@ -367,12 +367,17 @@ static int mtk_iommu_map(struct iommu_domain *domain, 
> > unsigned long iova,
> >                      phys_addr_t paddr, size_t size, int prot)
> >   {
> >     struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> > +   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> >     unsigned long flags;
> >     int ret;
> >   
> > +   if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
> > +       data->plat_data->has_4gb_mode &&
> > +       data->enable_4GB)
> > +           paddr |= BIT_ULL(32);
> 
> ...so it might be worth moving the rationale part of the current quirk 
> comment (i.e. that the "4GB mode" IOMMUs physically cannot use the lower 
> remap of RAM) to here where it now applies.
> 
> Robin.
> 
> > +
> >     spin_lock_irqsave(&dom->pgtlock, flags);
> > -   ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
> > -                       size, prot);
> > +   ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
> >     spin_unlock_irqrestore(&dom->pgtlock, flags);
> >   
> >     return ret;
> > @@ -401,7 +406,6 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct 
> > iommu_domain *domain,
> >                                       dma_addr_t iova)
> >   {
> >     struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> > -   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> >     unsigned long flags;
> >     phys_addr_t pa;
> >   
> > @@ -409,9 +413,6 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct 
> > iommu_domain *domain,
> >     pa = dom->iop->iova_to_phys(dom->iop, iova);
> >     spin_unlock_irqrestore(&dom->pgtlock, flags);
> >   
> > -   if (data->enable_4GB)
> > -           pa |= BIT_ULL(32);
> > -
> >     return pa;
> >   }
> >   
> > @@ -735,10 +736,12 @@ static int __maybe_unused mtk_iommu_resume(struct 
> > device *dev)
> >   
> >   static const struct mtk_iommu_plat_data mt2712_data = {
> >     .m4u_plat = M4U_MT2712,
> > +   .has_4gb_mode = true,
> >   };
> >   
> >   static const struct mtk_iommu_plat_data mt8173_data = {
> >     .m4u_plat = M4U_MT8173,
> > +   .has_4gb_mode = true,
> >   };
> >   
> >   static const struct of_device_id mtk_iommu_of_ids[] = {
> > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> > index 333a0ef..a243047 100644
> > --- a/drivers/iommu/mtk_iommu.h
> > +++ b/drivers/iommu/mtk_iommu.h
> > @@ -43,6 +43,7 @@ enum mtk_iommu_plat {
> >   
> >   struct mtk_iommu_plat_data {
> >     enum mtk_iommu_plat m4u_plat;
> > +   bool has_4gb_mode;
> >   };
> >   
> >   struct mtk_iommu_domain;
> > 


Reply via email to