On 14/10/2019 07:38, Yong Wu wrote:
Use the iommu_gather mechanism to achieve the tlb range flush.
Gather the iova range in the "tlb_add_page", then flush the merged iova
range in iotlb_sync.

Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to
avoid retry the lock since the spinlock have already been acquired.

I think this could probably be even simpler - once the actual register-poking is all confined to mtk_iommu_tlb_sync(), you should be able get rid of the per-domain locking in map/unmap and just have a single per-IOMMU lock to serialise syncs. The io-pgtable code itself hasn't needed external locking for a while now.

Robin.

Suggested-by: Tomasz Figa <tf...@chromium.org>
Signed-off-by: Yong Wu <yong...@mediatek.com>
---
1) This is the special case backtrace:

  mtk_iommu_iotlb_sync+0x50/0xa0
  mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0
  __arm_v7s_unmap+0x174/0x598
  arm_v7s_unmap+0x30/0x48
  mtk_iommu_unmap+0x50/0x78
  __iommu_unmap+0xa4/0xf8

2) The checking "if (gather->start == ULONG_MAX) return;" also is
necessary. It will happened when unmap only go to _flush_walk, then
enter this tlb_sync.
---
  drivers/iommu/mtk_iommu.c | 29 +++++++++++++++++++++++++----
  drivers/iommu/mtk_iommu.h |  1 +
  2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 5f594d6..8712afc 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct 
iommu_iotlb_gather *gather,
                                            unsigned long iova, size_t granule,
                                            void *cookie)
  {
-       mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+       struct mtk_iommu_data *data = cookie;
+       struct iommu_domain *domain = &data->m4u_dom->domain;
+
+       data->is_in_tlb_gather_add_page = true;
+       iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+       data->is_in_tlb_gather_add_page = false;
  }
static const struct iommu_flush_ops mtk_iommu_flush_ops = {
@@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain 
*domain)
  static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
                                 struct iommu_iotlb_gather *gather)
  {
+       struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
        struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+       bool is_in_gather = data->is_in_tlb_gather_add_page;
+       size_t length = gather->end - gather->start;
        unsigned long flags;
- spin_lock_irqsave(&dom->pgtlock, flags);
-       mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
-       spin_unlock_irqrestore(&dom->pgtlock, flags);
+       if (gather->start == ULONG_MAX)
+               return;
+
+       /*
+        * Avoid acquire the lock when it's in gather_add_page since the lock
+        * has already been held.
+        */
+       if (!is_in_gather)
+               spin_lock_irqsave(&dom->pgtlock, flags);
+
+       mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
+                                      false, data);
+       mtk_iommu_tlb_sync(data);
+
+       if (!is_in_gather)
+               spin_unlock_irqrestore(&dom->pgtlock, flags);
  }
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index fc0f16e..d29af1d 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -58,6 +58,7 @@ struct mtk_iommu_data {
        struct iommu_group              *m4u_group;
        bool                            enable_4GB;
        bool                            tlb_flush_active;
+       bool                            is_in_tlb_gather_add_page;
struct iommu_device iommu;
        const struct mtk_iommu_plat_data *plat_data;

Reply via email to