On Wed, May 9, 2018 at 4:56 PM, Nicholas Piggin <npig...@gmail.com> wrote: > When a single-threaded process has a non-local mm_cpumask and requires > a full PID tlbie invalidation, use that as an opportunity to reset the > cpumask back to the current CPU we're running on. > > No other thread can concurrently switch to this mm, because it must > have had a reference on mm_users before it could use_mm. mm_users can > be asynchronously incremented e.g., by mmget_not_zero, but those users > must not be doing use_mm. > > Signed-off-by: Nicholas Piggin <npig...@gmail.com> > --- > arch/powerpc/include/asm/mmu_context.h | 19 +++++++++ > arch/powerpc/include/asm/tlb.h | 8 ++++ > arch/powerpc/mm/tlb-radix.c | 57 +++++++++++++++++++------- > 3 files changed, 70 insertions(+), 14 deletions(-) > > diff --git a/arch/powerpc/include/asm/mmu_context.h > b/arch/powerpc/include/asm/mmu_context.h > index 1835ca1505d6..df12a994529f 100644 > --- a/arch/powerpc/include/asm/mmu_context.h > +++ b/arch/powerpc/include/asm/mmu_context.h > @@ -6,6 +6,7 @@ > #include <linux/kernel.h> > #include <linux/mm.h> > #include <linux/sched.h> > +#include <linux/sched/mm.h> > #include <linux/spinlock.h> > #include <asm/mmu.h> > #include <asm/cputable.h> > @@ -201,6 +202,24 @@ static inline void activate_mm(struct mm_struct *prev, > struct mm_struct *next) > static inline void enter_lazy_tlb(struct mm_struct *mm, > struct task_struct *tsk) > { > +#ifdef CONFIG_PPC_BOOK3S_64 > + /* > + * Under radix, we do not want to keep lazy PIDs around because > + * even if the CPU does not access userspace, it can still bring > + * in translations through speculation and prefetching. > + * > + * Switching away here allows us to trim back the mm_cpumask in > + * cases where we know the process is not running on some CPUs > + * (see mm/tlb-radix.c). > + */ > + if (radix_enabled() && mm != &init_mm) { > + mmgrab(&init_mm);
This is called when a kernel thread decides to unuse a mm, I agree switching to init_mm as active_mm is reasonable thing to do. > + tsk->active_mm = &init_mm; Are we called with irqs disabled? Don't we need it below? > + switch_mm_irqs_off(mm, tsk->active_mm, tsk); > + mmdrop(mm); > + } > +#endif > + > /* 64-bit Book3E keeps track of current PGD in the PACA */ > #ifdef CONFIG_PPC_BOOK3E_64 > get_paca()->pgd = NULL; > diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h > index a7eabff27a0f..006fce98c403 100644 > --- a/arch/powerpc/include/asm/tlb.h > +++ b/arch/powerpc/include/asm/tlb.h > @@ -76,6 +76,14 @@ static inline int mm_is_thread_local(struct mm_struct *mm) > return false; > return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); > } > +static inline void mm_reset_thread_local(struct mm_struct *mm) reset_thread_local --> reset_to_thread_local? > +{ > + WARN_ON(atomic_read(&mm->context.copros) > 0); Can we put this under DEBUG_VM, VM_WARN_ON? > + WARN_ON(!(atomic_read(&mm->mm_users) == 1 && current->mm == mm)); > + atomic_set(&mm->context.active_cpus, 1); > + cpumask_clear(mm_cpumask(mm)); > + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); > +} > #else /* CONFIG_PPC_BOOK3S_64 */ > static inline int mm_is_thread_local(struct mm_struct *mm) > { > diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c > index 5ac3206c51cc..d5593a78702a 100644 > --- a/arch/powerpc/mm/tlb-radix.c > +++ b/arch/powerpc/mm/tlb-radix.c > @@ -504,6 +504,15 @@ void radix__local_flush_tlb_page(struct vm_area_struct > *vma, unsigned long vmadd > } > EXPORT_SYMBOL(radix__local_flush_tlb_page); > > +static bool mm_is_singlethreaded(struct mm_struct *mm) > +{ mm_tlb_context_is_local? We should also skip init_mm from these checks > + if (atomic_read(&mm->context.copros) > 0) > + return false; > + if (atomic_read(&mm->mm_users) == 1 && current->mm == mm) > + return true; > + return false; > +} > + > static bool mm_needs_flush_escalation(struct mm_struct *mm) > { > /* > @@ -511,7 +520,9 @@ static bool mm_needs_flush_escalation(struct mm_struct > *mm) > * caching PTEs and not flushing them properly when > * RIC = 0 for a PID/LPID invalidate > */ > - return atomic_read(&mm->context.copros) != 0; > + if (atomic_read(&mm->context.copros) > 0) > + return true; > + return false; > } > > #ifdef CONFIG_SMP > @@ -525,12 +536,17 @@ void radix__flush_tlb_mm(struct mm_struct *mm) > > preempt_disable(); > if (!mm_is_thread_local(mm)) { > - if (mm_needs_flush_escalation(mm)) > + if (mm_is_singlethreaded(mm)) { > _tlbie_pid(pid, RIC_FLUSH_ALL); > - else > + mm_reset_thread_local(mm); > + } else if (mm_needs_flush_escalation(mm)) { > + _tlbie_pid(pid, RIC_FLUSH_ALL); > + } else { > _tlbie_pid(pid, RIC_FLUSH_TLB); > - } else > + } > + } else { > _tlbiel_pid(pid, RIC_FLUSH_TLB); > + } > preempt_enable(); > } > EXPORT_SYMBOL(radix__flush_tlb_mm); > @@ -544,10 +560,13 @@ void radix__flush_all_mm(struct mm_struct *mm) > return; > > preempt_disable(); > - if (!mm_is_thread_local(mm)) > + if (!mm_is_thread_local(mm)) { > _tlbie_pid(pid, RIC_FLUSH_ALL); > - else > + if (mm_is_singlethreaded(mm)) > + mm_reset_thread_local(mm); > + } else { > _tlbiel_pid(pid, RIC_FLUSH_ALL); > + } > preempt_enable(); > } > EXPORT_SYMBOL(radix__flush_all_mm); > @@ -644,10 +663,14 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, > unsigned long start, > if (local) { > _tlbiel_pid(pid, RIC_FLUSH_TLB); > } else { > - if (mm_needs_flush_escalation(mm)) > + if (mm_is_singlethreaded(mm)) { > + _tlbie_pid(pid, RIC_FLUSH_ALL); > + mm_reset_thread_local(mm); > + } else if (mm_needs_flush_escalation(mm)) { > _tlbie_pid(pid, RIC_FLUSH_ALL); > - else > + } else { > _tlbie_pid(pid, RIC_FLUSH_TLB); > + } > } > } else { > bool hflush = false; > @@ -802,13 +825,19 @@ static inline void > __radix__flush_tlb_range_psize(struct mm_struct *mm, > } > > if (full) { > - if (!local && mm_needs_flush_escalation(mm)) > - also_pwc = true; > - > - if (local) > + if (local) { > _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : > RIC_FLUSH_TLB); > - else > - _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: > RIC_FLUSH_TLB); > + } else { > + if (mm_is_singlethreaded(mm)) { > + _tlbie_pid(pid, RIC_FLUSH_ALL); > + mm_reset_thread_local(mm); > + } else { > + if (mm_needs_flush_escalation(mm)) > + also_pwc = true; > + > + _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : > RIC_FLUSH_TLB); > + } > + } > } else { > if (local) > _tlbiel_va_range(start, end, pid, page_size, psize, > also_pwc); Looks good otherwise Reviewed-by: Balbir Singh <bsinghar...@gmail.com> Balbir Singh.