On Tue, Sep 18, 2018 at 10:19:09AM +0200, Peter Zijlstra wrote: > On Mon, Sep 17, 2018 at 04:29:08PM +0200, Thomas Gleixner wrote: > > @@ -1288,23 +1287,23 @@ static int __change_page_attr(struct cpa > > err = split_large_page(cpa, kpte, address); > > if (!err) { > > /* > > + * Do a global flush tlb after splitting the large page > > + * and before we do the actual change page attribute in the PTE. > > + * > > + * With out this, we violate the TLB application note, that says > > + * "The TLBs may contain both ordinary and large-page > > * translations for a 4-KByte range of linear addresses. This > > * may occur if software modifies the paging structures so that > > * the page size used for the address range changes. If the two > > * translations differ with respect to page frame or attributes > > * (e.g., permissions), processor behavior is undefined and may > > * be implementation-specific." > > + * > > + * We do this global tlb flush inside the cpa_lock, so that we > > * don't allow any other cpu, with stale tlb entries change the > > * page attribute in parallel, that also falls into the > > * just split large page entry. > > + */ > > flush_tlb_all(); > > goto repeat; > > } > > this made me look at the tlb invalidation of that thing again; do we > want something like the below? >
Further cleanups are possible... --- --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -285,25 +285,20 @@ static void cpa_flush_all(unsigned long on_each_cpu(__cpa_flush_all, (void *) cache, 1); } -static void __cpa_flush_range(void *arg) -{ - /* - * We could optimize that further and do individual per page - * tlb invalidates for a low number of pages. Caveat: we must - * flush the high aliases on 64bit as well. - */ - __flush_tlb_all(); -} - static void cpa_flush_range(unsigned long start, int numpages, int cache) { - unsigned int i, level; - unsigned long addr; + unsigned long addr, end = start + PAGE_SIZE * numpages; + unsigned int level; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); WARN_ON(PAGE_ALIGN(start) != start); - on_each_cpu(__cpa_flush_range, NULL, 1); + if (!static_cpu_has(X86_FEATURE_CLFLUSH)) { + cpa_flush_all(cache); + return; + } + + flush_tlb_kernel_range(start, end); if (!cache) return; @@ -314,7 +309,7 @@ static void cpa_flush_range(unsigned lon * will cause all other CPUs to flush the same * cachelines: */ - for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) { + for (addr = start; addr < end; addr += PAGE_SIZE) { pte_t *pte = lookup_address(addr, &level); /* @@ -325,30 +320,22 @@ static void cpa_flush_range(unsigned lon } } -static void cpa_flush_array(unsigned long *start, int numpages, int cache, +static void cpa_flush_array(unsigned long baddr, unsigned long *start, + int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; -#ifdef CONFIG_PREEMPT - /* - * Avoid wbinvd() because it causes latencies on all CPUs, - * regardless of any CPU isolation that may be in effect. - * - * This should be extended for CAT enabled systems independent of - * PREEMPT because wbinvd() does not respect the CAT partitions and - * this is exposed to unpriviledged users through the graphics - * subsystem. - */ - unsigned long do_wbinvd = 0; -#else - unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ -#endif BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); - on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); + if (!static_cpu_has(X86_FEATURE_CLFLUSH)) { + cpa_flush_all(cache); + return; + } + + flush_tlb_kernel_range(baddr, baddr + PAGE_SIZE * numpages); - if (!cache || do_wbinvd) + if (!cache) return; /* @@ -1006,14 +993,24 @@ __split_large_page(struct cpa_data *cpa, __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); /* - * Intel Atom errata AAH41 workaround. + * Do a global flush tlb after splitting the large page + * and before we do the actual change page attribute in the PTE. * - * The real fix should be in hw or in a microcode update, but - * we also probabilistically try to reduce the window of having - * a large TLB mixed with 4K TLBs while instruction fetches are - * going on. + * Without this, we violate the TLB application note, that says + * "The TLBs may contain both ordinary and large-page + * translations for a 4-KByte range of linear addresses. This + * may occur if software modifies the paging structures so that + * the page size used for the address range changes. If the two + * translations differ with respect to page frame or attributes + * (e.g., permissions), processor behavior is undefined and may + * be implementation-specific." + * + * We do this global tlb flush inside the cpa_lock, so that we + * don't allow any other cpu, with stale tlb entries change the + * page attribute in parallel, that also falls into the + * just split large page entry. */ - __flush_tlb_all(); + flush_tlb_all(); spin_unlock(&pgd_lock); return 0; @@ -1538,28 +1535,8 @@ static int __change_page_attr(struct cpa * We have to split the large page: */ err = split_large_page(cpa, kpte, address); - if (!err) { - /* - * Do a global flush tlb after splitting the large page - * and before we do the actual change page attribute in the PTE. - * - * With out this, we violate the TLB application note, that says - * "The TLBs may contain both ordinary and large-page - * translations for a 4-KByte range of linear addresses. This - * may occur if software modifies the paging structures so that - * the page size used for the address range changes. If the two - * translations differ with respect to page frame or attributes - * (e.g., permissions), processor behavior is undefined and may - * be implementation-specific." - * - * We do this global tlb flush inside the cpa_lock, so that we - * don't allow any other cpu, with stale tlb entries change the - * page attribute in parallel, that also falls into the - * just split large page entry. - */ - flush_tlb_all(); + if (!err) goto repeat; - } return err; } @@ -1786,9 +1763,9 @@ static int change_page_attr_set_clr(unsi * error case we fall back to cpa_flush_all (which uses * WBINVD): */ - if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) { + if (!ret) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { - cpa_flush_array(addr, numpages, cache, + cpa_flush_array(baddr, addr, numpages, cache, cpa.flags, pages); } else cpa_flush_range(baddr, numpages, cache); @@ -2108,10 +2085,7 @@ static int __set_memory_enc_dec(unsigned /* * Before changing the encryption attribute, we need to flush caches. */ - if (static_cpu_has(X86_FEATURE_CLFLUSH)) - cpa_flush_range(start, numpages, 1); - else - cpa_flush_all(1); + cpa_flush_range(start, numpages, 1); ret = __change_page_attr_set_clr(&cpa, 1); @@ -2122,10 +2096,7 @@ static int __set_memory_enc_dec(unsigned * in case TLB flushing gets optimized in the cpa_flush_range() * path use the same logic as above. */ - if (static_cpu_has(X86_FEATURE_CLFLUSH)) - cpa_flush_range(start, numpages, 0); - else - cpa_flush_all(0); + cpa_flush_range(start, numpages, 0); return ret; }