Introduce a new 'flags' attribute per context and define its first bit to be a marker requiring all TLBIs for that context to be broadcasted globally. Once that marker is set on a context, it cannot be removed.
Such a marker is useful for memory contexts used by devices behind the NPU and CAPP/PSL. The NPU and the PSL keep their own translation cache so they need to see all the TLBIs for those contexts. Signed-off-by: Frederic Barrat <fbar...@linux.vnet.ibm.com> --- arch/powerpc/include/asm/book3s/64/mmu.h | 18 ++++++++++++++++++ arch/powerpc/include/asm/tlb.h | 23 +++++++++++++++++++++-- arch/powerpc/mm/mmu_context_book3s64.c | 1 + 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 77529a3e3811..cd83f8eb6a3f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -78,8 +78,12 @@ struct spinlock; /* Maximum possible number of NPUs in a system. */ #define NV_MAX_NPUS 8 +/* Bits definition for the context flags */ +#define MM_GLOBAL_TLBIE 0 /* TLBI must be global */ + typedef struct { mm_context_id_t id; + unsigned long flags; u16 user_psize; /* page size index */ /* NPU NMMU context */ @@ -164,5 +168,19 @@ extern void radix_init_pseries(void); static inline void radix_init_pseries(void) { }; #endif +/* + * Mark the memory context as requiring global TLBIs, when used by + * GPUs or CAPI accelerators managing their own TLB or ERAT. +*/ +static inline void mm_context_set_global_tlbi(mm_context_t *ctx) +{ + set_bit(MM_GLOBAL_TLBIE, &ctx->flags); +} + +static inline bool mm_context_get_global_tlbi(mm_context_t *ctx) +{ + return test_bit(MM_GLOBAL_TLBIE, &ctx->flags); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 609557569f65..87d4ddcbf7f8 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -71,8 +71,27 @@ static inline int mm_is_core_local(struct mm_struct *mm) static inline int mm_is_thread_local(struct mm_struct *mm) { - return cpumask_equal(mm_cpumask(mm), - cpumask_of(smp_processor_id())); + int rc; + + rc = cpumask_equal(mm_cpumask(mm), + cpumask_of(smp_processor_id())); +#ifdef CONFIG_PPC_BOOK3S_64 + if (rc) { + /* + * Check if context requires global TLBI. + * + * We need to make sure the PTE update is happening + * before reading the context global flag. Otherwise, + * reading the flag may be re-ordered and happen + * first, and we could end up in a situation where the + * old PTE was seen by a device, but the TLBI is not + * global. + */ + smp_mb(); + rc = !mm_context_get_global_tlbi(&mm->context); + } +#endif + return rc; } #else diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index a3edf813d455..c32a3f729d81 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -156,6 +156,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) return index; mm->context.id = index; + mm->context.flags = 0; #ifdef CONFIG_PPC_ICSWX mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); if (!mm->context.cop_lockp) { -- 2.11.0