Author: kib Date: Sun Jan 29 19:14:48 2017 New Revision: 312954 URL: https://svnweb.freebsd.org/changeset/base/312954
Log: Do not leave stale 4K TLB entries on pde (superpage) removal or protection change. On superpage promotion, x86 pmaps do not invalidate existing 4K entries for the superpage range, because they are compatible with the promoted 2/4M entry. But the invalidation on superpage removal or protection change only did single INVLPG with the base address of the superpage. This reliably flushed superpage TLB entry, and 4K entry for the first page of the superpage, potentially leaving other 4K TLB entries lingering. Do the invalidation of the whole superpage range to correct the problem. Note that the precise invalidation is done by x86 code for kernel_pmap only, for user pmaps whole (per-AS) TLB is flushed. This made the bug well hidden, because promotions of the kernel mappings require specific load. Reported and tested by: Jonathan Looney <j...@netflix.com> (previous version) Reviewed by: alc Sponsored by: The FreeBSD Foundation MFC after: 1 week Modified: head/sys/amd64/amd64/pmap.c head/sys/i386/i386/pmap.c Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Sun Jan 29 18:41:09 2017 (r312953) +++ head/sys/amd64/amd64/pmap.c Sun Jan 29 19:14:48 2017 (r312954) @@ -1040,7 +1040,12 @@ pmap_bootstrap(vm_paddr_t *firstaddr) virtual_avail = va; - /* Initialize the PAT MSR. */ + /* + * Initialize the PAT MSR. + * pmap_init_pat() clears and sets CR4_PGE, which, as a + * side-effect, invalidates stale PG_G TLB entries that might + * have been created in our pre-boot environment. + */ pmap_init_pat(); /* Initialize TLB Context Id. */ @@ -3428,6 +3433,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_e vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; + vm_offset_t sva; int PG_PTE_CACHE; PG_G = pmap_global_bit(pmap); @@ -3464,9 +3470,9 @@ pmap_demote_pde_locked(pmap_t pmap, pd_e DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); - pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free, - lockp); - pmap_invalidate_page(pmap, trunc_2mpage(va)); + sva = trunc_2mpage(va); + pmap_remove_pde(pmap, pde, sva, &free, lockp); + pmap_invalidate_range(pmap, sva, sva + NBPDR - 1); pmap_free_zero_pages(&free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" " in pmap %p", va, pmap); @@ -3608,11 +3614,23 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* - * Machines that don't support invlpg, also don't support - * PG_G. - */ - if (oldpde & PG_G) - pmap_invalidate_page(kernel_pmap, sva); + * When workaround_erratum383 is false, a promotion to a 2M + * page mapping does not invalidate the 512 4K page mappings + * from the TLB. Consequently, at this point, the TLB may + * hold both 4K and 2M page mappings. Therefore, the entire + * range of addresses must be invalidated here. In contrast, + * when workaround_erratum383 is true, a promotion does + * invalidate the 512 4K page mappings, and so a single INVLPG + * suffices to invalidate the 2M page mapping. + */ + if ((oldpde & PG_G) != 0) { + if (workaround_erratum383) + pmap_invalidate_page(kernel_pmap, sva); + else + pmap_invalidate_range(kernel_pmap, sva, + sva + NBPDR - 1); + } + pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME); @@ -3994,9 +4012,14 @@ retry: if (newpde != oldpde) { if (!atomic_cmpset_long(pde, oldpde, newpde)) goto retry; - if (oldpde & PG_G) - pmap_invalidate_page(pmap, sva); - else + if (oldpde & PG_G) { + /* See pmap_remove_pde() for explanation. */ + if (workaround_erratum383) + pmap_invalidate_page(kernel_pmap, sva); + else + pmap_invalidate_range(kernel_pmap, sva, + sva + NBPDR - 1); + } else anychanged = TRUE; } return (anychanged); Modified: head/sys/i386/i386/pmap.c ============================================================================== --- head/sys/i386/i386/pmap.c Sun Jan 29 18:41:09 2017 (r312953) +++ head/sys/i386/i386/pmap.c Sun Jan 29 19:14:48 2017 (r312954) @@ -507,7 +507,14 @@ pmap_bootstrap(vm_paddr_t firstaddr) for (i = 1; i < NKPT; i++) PTD[i] = 0; - /* Initialize the PAT MSR if present. */ + /* + * Initialize the PAT MSR if present. + * pmap_init_pat() clears and sets CR4_PGE, which, as a + * side-effect, invalidates stale PG_G TLB entries that might + * have been created in our pre-boot environment. We assume + * that PAT support implies PGE and in reverse, PGE presence + * comes with PAT. Both features were added for Pentium Pro. + */ pmap_init_pat(); /* Turn on PG_G on kernel page(s) */ @@ -564,7 +571,10 @@ pmap_init_pat(void) pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; - /* Bail if this CPU doesn't implement PAT. */ + /* + * Bail if this CPU doesn't implement PAT. + * We assume that PAT support implies PGE. + */ if ((cpu_feature & CPUID_PAT) == 0) { for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; @@ -2620,6 +2630,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; + vm_offset_t sva; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; @@ -2640,8 +2651,9 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); - pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); - pmap_invalidate_page(pmap, trunc_4mpage(va)); + sva = trunc_4mpage(va); + pmap_remove_pde(pmap, pde, sva, &free); + pmap_invalidate_range(pmap, sva, sva + NBPDR - 1); pmap_free_zero_pages(&free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" " in pmap %p", va, pmap); @@ -2811,9 +2823,24 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t /* * Machines that don't support invlpg, also don't support * PG_G. - */ - if (oldpde & PG_G) - pmap_invalidate_page(kernel_pmap, sva); + * + * When workaround_erratum383 is false, a promotion to a 2M/4M + * page mapping does not invalidate the 512/1024 4K page mappings + * from the TLB. Consequently, at this point, the TLB may + * hold both 4K and 2M/4M page mappings. Therefore, the entire + * range of addresses must be invalidated here. In contrast, + * when workaround_erratum383 is true, a promotion does + * invalidate the 512/1024 4K page mappings, and so a single INVLPG + * suffices to invalidate the 2M/4M page mapping. + */ + if ((oldpde & PG_G) != 0) { + if (workaround_erratum383) + pmap_invalidate_page(kernel_pmap, sva); + else + pmap_invalidate_range(kernel_pmap, sva, + sva + NBPDR - 1); + } + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); @@ -3122,9 +3149,14 @@ retry: if (newpde != oldpde) { if (!pde_cmpset(pde, oldpde, newpde)) goto retry; - if (oldpde & PG_G) - pmap_invalidate_page(pmap, sva); - else + if (oldpde & PG_G) { + /* See pmap_remove_pde() for explanation. */ + if (workaround_erratum383) + pmap_invalidate_page(kernel_pmap, sva); + else + pmap_invalidate_range(kernel_pmap, sva, + sva + NBPDR - 1); + } else anychanged = TRUE; } return (anychanged); _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"