On Tue, Sep 18, 2018 at 10:19:09AM +0200, Peter Zijlstra wrote:
> On Mon, Sep 17, 2018 at 04:29:08PM +0200, Thomas Gleixner wrote:
> > @@ -1288,23 +1287,23 @@ static int __change_page_attr(struct cpa
> >     err = split_large_page(cpa, kpte, address);
> >     if (!err) {
> >             /*
> > +            * Do a global flush tlb after splitting the large page
> > +            * and before we do the actual change page attribute in the PTE.
> > +            *
> > +            * With out this, we violate the TLB application note, that says
> > +            * "The TLBs may contain both ordinary and large-page
> >              *  translations for a 4-KByte range of linear addresses. This
> >              *  may occur if software modifies the paging structures so that
> >              *  the page size used for the address range changes. If the two
> >              *  translations differ with respect to page frame or attributes
> >              *  (e.g., permissions), processor behavior is undefined and may
> >              *  be implementation-specific."
> > +            *
> > +            * We do this global tlb flush inside the cpa_lock, so that we
> >              * don't allow any other cpu, with stale tlb entries change the
> >              * page attribute in parallel, that also falls into the
> >              * just split large page entry.
> > +            */
> >             flush_tlb_all();
> >             goto repeat;
> >     }
> 
> this made me look at the tlb invalidation of that thing again; do we
> want something like the below?
> 

Further cleanups are possible...

---
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -285,25 +285,20 @@ static void cpa_flush_all(unsigned long
        on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
 
-static void __cpa_flush_range(void *arg)
-{
-       /*
-        * We could optimize that further and do individual per page
-        * tlb invalidates for a low number of pages. Caveat: we must
-        * flush the high aliases on 64bit as well.
-        */
-       __flush_tlb_all();
-}
-
 static void cpa_flush_range(unsigned long start, int numpages, int cache)
 {
-       unsigned int i, level;
-       unsigned long addr;
+       unsigned long addr, end = start + PAGE_SIZE * numpages;
+       unsigned int level;
 
        BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
        WARN_ON(PAGE_ALIGN(start) != start);
 
-       on_each_cpu(__cpa_flush_range, NULL, 1);
+       if (!static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               cpa_flush_all(cache);
+               return;
+       }
+
+       flush_tlb_kernel_range(start, end);
 
        if (!cache)
                return;
@@ -314,7 +309,7 @@ static void cpa_flush_range(unsigned lon
         * will cause all other CPUs to flush the same
         * cachelines:
         */
-       for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
+       for (addr = start; addr < end; addr += PAGE_SIZE) {
                pte_t *pte = lookup_address(addr, &level);
 
                /*
@@ -325,30 +320,22 @@ static void cpa_flush_range(unsigned lon
        }
 }
 
-static void cpa_flush_array(unsigned long *start, int numpages, int cache,
+static void cpa_flush_array(unsigned long baddr, unsigned long *start, 
+                           int numpages, int cache,
                            int in_flags, struct page **pages)
 {
        unsigned int i, level;
-#ifdef CONFIG_PREEMPT
-       /*
-        * Avoid wbinvd() because it causes latencies on all CPUs,
-        * regardless of any CPU isolation that may be in effect.
-        *
-        * This should be extended for CAT enabled systems independent of
-        * PREEMPT because wbinvd() does not respect the CAT partitions and
-        * this is exposed to unpriviledged users through the graphics
-        * subsystem.
-        */
-       unsigned long do_wbinvd = 0;
-#else
-       unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
-#endif
 
        BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 
-       on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
+       if (!static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               cpa_flush_all(cache);
+               return;
+       }
+
+       flush_tlb_kernel_range(baddr, baddr + PAGE_SIZE * numpages);
 
-       if (!cache || do_wbinvd)
+       if (!cache)
                return;
 
        /*
@@ -1006,14 +993,24 @@ __split_large_page(struct cpa_data *cpa,
        __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
 
        /*
-        * Intel Atom errata AAH41 workaround.
+        * Do a global flush tlb after splitting the large page
+        * and before we do the actual change page attribute in the PTE.
         *
-        * The real fix should be in hw or in a microcode update, but
-        * we also probabilistically try to reduce the window of having
-        * a large TLB mixed with 4K TLBs while instruction fetches are
-        * going on.
+        * Without this, we violate the TLB application note, that says
+        * "The TLBs may contain both ordinary and large-page
+        *  translations for a 4-KByte range of linear addresses. This
+        *  may occur if software modifies the paging structures so that
+        *  the page size used for the address range changes. If the two
+        *  translations differ with respect to page frame or attributes
+        *  (e.g., permissions), processor behavior is undefined and may
+        *  be implementation-specific."
+        *
+        * We do this global tlb flush inside the cpa_lock, so that we
+        * don't allow any other cpu, with stale tlb entries change the
+        * page attribute in parallel, that also falls into the
+        * just split large page entry.
         */
-       __flush_tlb_all();
+       flush_tlb_all();
        spin_unlock(&pgd_lock);
 
        return 0;
@@ -1538,28 +1535,8 @@ static int __change_page_attr(struct cpa
         * We have to split the large page:
         */
        err = split_large_page(cpa, kpte, address);
-       if (!err) {
-               /*
-                * Do a global flush tlb after splitting the large page
-                * and before we do the actual change page attribute in the PTE.
-                *
-                * With out this, we violate the TLB application note, that says
-                * "The TLBs may contain both ordinary and large-page
-                *  translations for a 4-KByte range of linear addresses. This
-                *  may occur if software modifies the paging structures so that
-                *  the page size used for the address range changes. If the two
-                *  translations differ with respect to page frame or attributes
-                *  (e.g., permissions), processor behavior is undefined and may
-                *  be implementation-specific."
-                *
-                * We do this global tlb flush inside the cpa_lock, so that we
-                * don't allow any other cpu, with stale tlb entries change the
-                * page attribute in parallel, that also falls into the
-                * just split large page entry.
-                */
-               flush_tlb_all();
+       if (!err)
                goto repeat;
-       }
 
        return err;
 }
@@ -1786,9 +1763,9 @@ static int change_page_attr_set_clr(unsi
         * error case we fall back to cpa_flush_all (which uses
         * WBINVD):
         */
-       if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) {
+       if (!ret) {
                if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
-                       cpa_flush_array(addr, numpages, cache,
+                       cpa_flush_array(baddr, addr, numpages, cache,
                                        cpa.flags, pages);
                } else
                        cpa_flush_range(baddr, numpages, cache);
@@ -2108,10 +2085,7 @@ static int __set_memory_enc_dec(unsigned
        /*
         * Before changing the encryption attribute, we need to flush caches.
         */
-       if (static_cpu_has(X86_FEATURE_CLFLUSH))
-               cpa_flush_range(start, numpages, 1);
-       else
-               cpa_flush_all(1);
+       cpa_flush_range(start, numpages, 1);
 
        ret = __change_page_attr_set_clr(&cpa, 1);
 
@@ -2122,10 +2096,7 @@ static int __set_memory_enc_dec(unsigned
         * in case TLB flushing gets optimized in the cpa_flush_range()
         * path use the same logic as above.
         */
-       if (static_cpu_has(X86_FEATURE_CLFLUSH))
-               cpa_flush_range(start, numpages, 0);
-       else
-               cpa_flush_all(0);
+       cpa_flush_range(start, numpages, 0);
 
        return ret;
 }

Reply via email to