Hi Peter,

On Fri, Aug 11, 2017 at 04:04:50PM +0200, Peter Zijlstra wrote:
> 
> Ok, so I have the below to still go on-top.
> 
> Ideally someone would clarify the situation around
> mm_tlb_flush_nested(), because ideally we'd remove the
> smp_mb__after_atomic() and go back to relying on PTL alone.
> 
> This also removes the pointless smp_mb__before_atomic()

I'm not an expert of barrier stuff but IIUC, mm_tlb_flush_nested's
side full memory barrier can go with removing smp_mb__after_atomic
in inc_tlb_flush_pending side?


diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 490af494c2da..5ad0e66df363 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -544,7 +544,12 @@ static inline bool mm_tlb_flush_pending(struct mm_struct 
*mm)
  */
 static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
 {
-       return atomic_read(&mm->tlb_flush_pending) > 1;
+       /*
+        * atomic_dec_and_test's full memory barrier guarantees
+        * to see uptodate tlb_flush_pending count in other CPU
+        * without relying on page table lock.
+        */
+       return !atomic_dec_and_test(&mm->tlb_flush_pending);
 }
 
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
diff --git a/mm/memory.c b/mm/memory.c
index f571b0eb9816..e90b57bc65fb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -407,6 +407,10 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct 
mm_struct *mm,
                        unsigned long start, unsigned long end)
 {
        arch_tlb_gather_mmu(tlb, mm, start, end);
+       /*
+        * couterpart is mm_tlb_flush_nested in tlb_finish_mmu
+        * which decreases pending count.
+        */
        inc_tlb_flush_pending(tlb->mm);
 }
 
@@ -446,9 +450,7 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
         *
         */
        bool force = mm_tlb_flush_nested(tlb->mm);
-
        arch_tlb_finish_mmu(tlb, start, end, force);
-       dec_tlb_flush_pending(tlb->mm);
 }
 
 /*

Reply via email to