Wed, 19 May 2010 11:46:30 +0200, Andrew Doran wrote: > On Wed, May 19, 2010 at 01:28:26AM +0200, Christoph Egger wrote: > > > Attached patch adds PAT support to x86. > > I implement BUS_SPACE_MAP_PREFETCHABLE with it. > > > > Note, this patch adds MI pmap flags to uvm_pmap.h. > > Please review if they fit to the other ports than x86, too. > > > > PMAP_NOCACHE turned into an MI flag, so the following > > ports need an update: > > > > hp700, hppa and sgimips. > > > > Please send me the update-patches. I will accumulate > > them so all will go into the tree in one go. > > > > When you boot with -x you should see something like this: > > > > $ dmesg | grep PAT > > cpu0: PAT enabled > > cpu1: PAT enabled > > $ > > > > Any comments? > > pat.h - cpuvar.h is better for small stuff > pat.c - why not in pmap.c? > pte.h - this needs a copyright > specialreg.h - <TAB> after #define > bus_space.c - u_int pmapflags = PMAP_NOCACHE - don't init. at site of defn. > bus_spcae.c - if (flags & BUS_SPACE_MAP_CACHEABLE) - want ((f & FL) != 0) > pat.c - pat_init - prefer macros at top of file >
New patch attached. All of above done + updated hppa (compile tested only) + updated mips/sgimips (compile tested only) + updated pmap(9) but this: > cpu.c - cpu_attach - this needs to be in cpu_hatch() so executes on BPs No, all processors must have the same PAT and MTRR settings or coherency violations or loss of atomicity may happen otherwise. > Thanks for working on this! You're welcome. Christoph
# HG changeset patch # User ceg...@netbsd.org # Date 1277910073 -7200 Implement PAT support diff -r b7b4ecb9c0cb -r d0d616f5cf8e share/man/man9/pmap.9 --- a/share/man/man9/pmap.9 +++ b/share/man/man9/pmap.9 @@ -486,8 +486,23 @@ resources, the .Nm module must panic. .It PMAP_NOCACHE -The mapping being created is not cached. +The mapping being created is +.Em not +cached. Write accesses have a write-through policy. +No speculative memory accesses. +.It PMAP_WRITE_COMBINE +The mapping being created is +.Em not +cached. +Writes are combined and done in one burst. +Speculative read accesses may be allowed. +.It PMAP_WRITE_BACK +All accesses to the created mapping are cached. +On reads, cachelines become shared or exclusive if allocated on cache miss. +On writes, cachelines become modified on a cache miss. +.It PMAP_NOCACHE_OVR +Same as PMAP_NOCACHE but mapping is overrideable (e.g. on x86 by MTRRs). .El .Pp The access type provided in the @@ -633,6 +648,19 @@ The mapping being created is .Em not cached. Write accesses have a write-through policy. +No speculative memory accesses. +.It PMAP_WRITE_COMBINE +The mapping being created is +.Em not +cached. +Writes are combined and done in one burst. +Speculative read accesses may be allowed. +.It PMAP_WRITE_BACK +All accesses to the created mapping are cached. +On reads, cachelines become shared or exclusive if allocated on cache miss. +On writes, cachelines become modified on a cache miss. +.It PMAP_NOCACHE_OVR +Same as PMAP_NOCACHE but mapping is overrideable (e.g. on x86 by MTRRs). .El .Pp Mappings of this type are always diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/amd64/include/pte.h --- a/sys/arch/amd64/include/pte.h +++ b/sys/arch/amd64/include/pte.h @@ -105,18 +105,23 @@ typedef uint64_t pt_entry_t; /* PTE */ #define PG_RW 0x0000000000000002 /* read-write */ #define PG_u 0x0000000000000004 /* user accessible */ #define PG_PROT 0x0000000000000006 -#define PG_N 0x0000000000000018 /* non-cacheable */ +#define PG_WT 0x0000000000000008 /* write-through */ +#define PG_N 0x0000000000000010 /* non-cacheable */ #define PG_U 0x0000000000000020 /* used */ #define PG_M 0x0000000000000040 /* modified */ -#define PG_PS 0x0000000000000080 /* 2MB page size */ +#define PG_PAT 0x0000000000000080 /* PAT (on pte) */ +#define PG_PS 0x0000000000000080 /* 2MB page size (on pde) */ #define PG_G 0x0000000000000100 /* not flushed */ #define PG_AVAIL1 0x0000000000000200 #define PG_AVAIL2 0x0000000000000400 #define PG_AVAIL3 0x0000000000000800 +#define PG_LGPAT 0x0000000000001000 /* PAT on large pages */ #define PG_FRAME 0x000ffffffffff000 #define PG_NX 0x8000000000000000 -#define PG_LGFRAME 0x000fffffffe00000 /* large (2M) page frame mask */ +#define PG_2MFRAME 0x000fffffffe00000 /* large (2M) page frame mask */ +#define PG_1GFRAME 0x000fffffc0000000 /* large (1G) page frame mask */ +#define PG_LGFRAME PG_2MFRAME /* * short forms of protection codes @@ -125,13 +130,6 @@ typedef uint64_t pt_entry_t; /* PTE */ #define PG_KR 0x0000000000000000 /* kernel read-only */ #define PG_KW 0x0000000000000002 /* kernel read-write */ -/* - * page protection exception bits - */ - -#define PGEX_P 0x01 /* protection violation (vs. no mapping) */ -#define PGEX_W 0x02 /* exception during a write cycle */ -#define PGEX_U 0x04 /* exception while in user mode (upl) */ -#define PGEX_X 0x10 /* exception during instruction fetch */ +#include <x86/pte.h> #endif /* _AMD64_PTE_H_ */ diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/hppa/include/pmap.h --- a/sys/arch/hppa/include/pmap.h +++ b/sys/arch/hppa/include/pmap.h @@ -192,11 +192,6 @@ pmap_protect(struct pmap *pmap, vaddr_t ((((va) & 0xc0000000) != 0xc0000000) ? \ (pmap)->pm_space : HPPA_SID_KERNEL) -/* - * MD flags that we use for pmap_kenter_pa: - */ -#define PMAP_NOCACHE 0x01000000 /* set the non-cacheable bit */ - #endif /* _KERNEL */ #endif /* _HPPA_PMAP_H_ */ diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/i386/include/pte.h --- a/sys/arch/i386/include/pte.h +++ b/sys/arch/i386/include/pte.h @@ -218,7 +218,9 @@ typedef uint32_t pt_entry_t; /* PTE */ /* macros to get real L2 and L3 index, from our "extended" L2 index */ #define l2tol3(idx) ((idx) >> (L3_SHIFT - L2_SHIFT)) #define l2tol2(idx) ((idx) & (L2_REALMASK >> L2_SHIFT)) + #else /* PAE */ + #define L1_SHIFT 12 #define L2_SHIFT 22 #define NBPD_L1 (1UL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */ @@ -245,14 +247,17 @@ typedef uint32_t pt_entry_t; /* PTE */ #define PG_RW 0x00000002 /* read-write page */ #define PG_u 0x00000004 /* user accessible page */ #define PG_PROT 0x00000806 /* all protection bits */ -#define PG_N 0x00000018 /* non-cacheable */ +#define PG_WT 0x00000008 /* write through */ +#define PG_N 0x00000010 /* non-cacheable */ #define PG_U 0x00000020 /* has been used */ #define PG_M 0x00000040 /* has been modified */ +#define PG_PAT 0x00000080 /* PAT (on pte) */ #define PG_PS 0x00000080 /* 4MB page size */ #define PG_G 0x00000100 /* global, don't TLB flush */ #define PG_AVAIL1 0x00000200 /* ignored by hardware */ #define PG_AVAIL2 0x00000400 /* ignored by hardware */ #define PG_AVAIL3 0x00000800 /* ignored by hardware */ +#define PG_LGPAT 0x00001000 /* PAT on large pages */ /* * various short-hand protection codes @@ -267,13 +272,6 @@ typedef uint32_t pt_entry_t; /* PTE */ #define PG_NX 0 /* dummy */ #endif -/* - * page protection exception bits - */ - -#define PGEX_P 0x01 /* protection violation (vs. no mapping) */ -#define PGEX_W 0x02 /* exception during a write cycle */ -#define PGEX_U 0x04 /* exception while in user mode (upl) */ -#define PGEX_X 0x10 /* exception during instruction fetch */ +#include <x86/pte.h> #endif /* _I386_PTE_H_ */ diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/mips/include/pmap.h --- a/sys/arch/mips/include/pmap.h +++ b/sys/arch/mips/include/pmap.h @@ -202,7 +202,7 @@ paddr_t mips_pmap_unmap_poolpage(vaddr_t #define PMAP_CCA_FOR_PA(pa) CCA_UNCACHED /* uncached */ #if defined(_MIPS_PADDR_T_64BIT) || defined(_LP64) -#define PMAP_NOCACHE 0x4000000000000000ULL +#define PGC_NOCACHE 0x4000000000000000ULL #endif #endif /* _KERNEL */ diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/mips/mips/pmap.c --- a/sys/arch/mips/mips/pmap.c +++ b/sys/arch/mips/mips/pmap.c @@ -1177,9 +1177,12 @@ pmap_enter(pmap_t pmap, vaddr_t va, padd #endif #if defined(_MIPS_PADDR_T_64BIT) || defined(_LP64) - if (pa & PMAP_NOCACHE) { + if (flags & PMAP_NOCACHE) { cached = 0; - pa &= ~PMAP_NOCACHE; + pa &= ~PGC_NOCACHE; + } else { + cached = 1; + pa |= PGC_NOCACHE; } #endif diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/sgimips/sgimips/bus.c --- a/sys/arch/sgimips/sgimips/bus.c +++ b/sys/arch/sgimips/sgimips/bus.c @@ -1116,6 +1116,7 @@ _bus_dmamem_map(bus_dma_tag_t t, bus_dma int curseg; const uvm_flag_t kmflags = (flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0; + u_int pmapflags; /* * If we're only mapping 1 segment, use KSEG0 or KSEG1, to avoid @@ -1139,21 +1140,19 @@ _bus_dmamem_map(bus_dma_tag_t t, bus_dma *kvap = (void *)va; + pmapflags = VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED; + if (flags & BUS_DMA_COHERENT) + pmapflags |= PMAP_NOCACHE; + for (curseg = 0; curseg < nsegs; curseg++) { for (addr = segs[curseg].ds_addr; addr < (segs[curseg].ds_addr + segs[curseg].ds_len); addr += PAGE_SIZE, va += PAGE_SIZE, size -= PAGE_SIZE) { if (size == 0) panic("_bus_dmamem_map: size botch"); - pmap_enter(pmap_kernel(), va, -#if defined(_MIPS_PADDR_T_64BIT) || defined(_LP64) - (flags & BUS_DMA_COHERENT) ? - addr | PMAP_NOCACHE : addr, -#else - addr, -#endif + pmap_enter(pmap_kernel(), va, addr, VM_PROT_READ | VM_PROT_WRITE, - VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED); + pmapflags); } } pmap_update(pmap_kernel()); diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/x86/include/cpuvar.h --- a/sys/arch/x86/include/cpuvar.h +++ b/sys/arch/x86/include/cpuvar.h @@ -140,12 +140,13 @@ int p4_get_bus_clock(struct cpu_info *); #endif void cpu_get_tsc_freq(struct cpu_info *); +void pat_init(struct cpu_info *); extern int cpu_vendor; extern bool x86_mp_online; extern uint32_t cpu_feature[5]; -#endif +#endif /* _KERNEL */ #endif /* !_X86_CPUVAR_H_ */ diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/x86/include/pmap.h --- a/sys/arch/x86/include/pmap.h +++ b/sys/arch/x86/include/pmap.h @@ -178,7 +178,6 @@ struct pmap { /* * MD flags that we use for pmap_enter and pmap_kenter_pa: */ -#define PMAP_NOCACHE 0x01000000 /* set the non-cacheable bit */ /* * global kernel variables diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/x86/include/pte.h --- /dev/null +++ b/sys/arch/x86/include/pte.h @@ -0,0 +1,50 @@ +/* $NetBSD: $ */ + +/* + * Copyright (c) 2010 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christoph Egger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _X86_PTE_H +#define _X86_PTE_H + +/* Cacheability bits when we are using PAT */ +#define PGC_WB 0 /* The default */ +#define PGC_WC PG_WT /* WT and CD is WC */ +#define PGC_UCMINUS PG_N /* UC but mtrr can override */ +#define PGC_UC (PG_WT | PG_N) /* hard UC */ + +/* + * page protection exception bits + */ + +#define PGEX_P 0x01 /* protection violation (vs. no mapping) */ +#define PGEX_W 0x02 /* exception during a write cycle */ +#define PGEX_U 0x04 /* exception while in user mode (upl) */ +#define PGEX_X 0x10 /* exception during instruction fetch */ + +#endif /* _X86_PTE_H */ diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/x86/include/specialreg.h --- a/sys/arch/x86/include/specialreg.h +++ b/sys/arch/x86/include/specialreg.h @@ -343,6 +343,7 @@ #define MSR_MTRRfix4K_E8000 0x26d #define MSR_MTRRfix4K_F0000 0x26e #define MSR_MTRRfix4K_F8000 0x26f +#define MSR_CR_PAT 0x277 #define MSR_MTRRdefType 0x2ff #define MSR_MC0_CTL 0x400 #define MSR_MC0_STATUS 0x401 diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/x86/x86/bus_space.c --- a/sys/arch/x86/x86/bus_space.c +++ b/sys/arch/x86/x86/bus_space.c @@ -199,8 +199,7 @@ bus_space_map(bus_space_tag_t t, bus_add * For memory space, map the bus physical address to * a kernel virtual address. */ - error = x86_mem_add_mapping(bpa, size, - (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp); + error = x86_mem_add_mapping(bpa, size, flags, bshp); if (error) { if (extent_free(ex, bpa, size, EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) { @@ -232,8 +231,7 @@ _x86_memio_map(bus_space_tag_t t, bus_ad * For memory space, map the bus physical address to * a kernel virtual address. */ - return (x86_mem_add_mapping(bpa, size, - (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp)); + return x86_mem_add_mapping(bpa, size, flags, bshp); } int @@ -286,8 +284,7 @@ bus_space_alloc(bus_space_tag_t t, bus_a * For memory space, map the bus physical address to * a kernel virtual address. */ - error = x86_mem_add_mapping(bpa, size, - (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp); + error = x86_mem_add_mapping(bpa, size, flags, bshp); if (error) { if (extent_free(iomem_ex, bpa, size, EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) { @@ -304,17 +301,20 @@ bus_space_alloc(bus_space_tag_t t, bus_a int x86_mem_add_mapping(bus_addr_t bpa, bus_size_t size, - int cacheable, bus_space_handle_t *bshp) + int flags, bus_space_handle_t *bshp) { paddr_t pa, endpa; vaddr_t va, sva; - u_int pmapflags = 0; + u_int pmapflags; pa = x86_trunc_page(bpa); endpa = x86_round_page(bpa + size); - if (!cacheable) - pmapflags |= PMAP_NOCACHE; + pmapflags = PMAP_NOCACHE; + if ((flags & BUS_SPACE_MAP_CACHEABLE) != 0) + pmapflags = 0; + else if (flags & BUS_SPACE_MAP_PREFETCHABLE) + pmapflags = PMAP_WRITE_COMBINE; #ifdef DIAGNOSTIC if (endpa != 0 && endpa <= pa) diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/x86/x86/cpu.c --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -424,6 +424,7 @@ cpu_attach(device_t parent, device_t sel panic("unknown processor type??\n"); } + pat_init(ci); atomic_or_32(&cpus_attached, ci->ci_cpumask); if (!pmf_device_register(self, cpu_suspend, cpu_resume)) diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/x86/x86/pmap.c --- a/sys/arch/x86/x86/pmap.c +++ b/sys/arch/x86/x86/pmap.c @@ -361,6 +361,20 @@ struct evcnt pmap_tlb_evcnt __aligned(64 struct pmap_mbox pmap_mbox __aligned(64); /* + * PAT + */ +#define PATENTRY(n, type) (type << ((n) * 8)) +#define PAT_UC 0x0ULL +#define PAT_WC 0x1ULL +#define PAT_WT 0x4ULL +#define PAT_WP 0x5ULL +#define PAT_WB 0x6ULL +#define PAT_UCMINUS 0x7ULL + +static bool cpu_pat_enabled = false; + + +/* * Per-CPU data. The pmap mailbox is cache intensive so gets its * own line. Note that the mailbox must be the first item. */ @@ -1004,6 +1018,57 @@ pmap_exec_fixup(struct vm_map *map, stru } #endif /* !defined(__x86_64__) */ +void +pat_init(struct cpu_info *ci) +{ + uint64_t pat; + + if (!(ci->ci_feat_val[0] & CPUID_PAT)) + return; + + /* We change WT to WC. Leave all other entries the default values. */ + pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | + PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | + PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | + PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); + + wrmsr(MSR_CR_PAT, pat); + cpu_pat_enabled = true; + aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); +} + +static pt_entry_t +pmap_pat_flags(u_int flags) +{ + u_int cacheflags = (flags & PMAP_CACHE_MASK); + + if (!cpu_pat_enabled) { + switch (cacheflags) { + case PMAP_NOCACHE: + case PMAP_NOCACHE_OVR: + /* results in PGC_UCMINUS on cpus which have + * the cpuid PAT but PAT "disabled" + */ + return PG_N; + default: + return 0; + } + } + + switch (cacheflags) { + case PMAP_NOCACHE: + return PGC_UC; + case PMAP_WRITE_COMBINE: + return PGC_WC; + case PMAP_WRITE_BACK: + return PGC_WB; + case PMAP_NOCACHE_OVR: + return PGC_UCMINUS; + } + + return 0; +} + /* * p m a p k e n t e r f u n c t i o n s * @@ -1041,8 +1106,7 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, v #endif /* DOM0OPS */ npte = pmap_pa2pte(pa); npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; - if (flags & PMAP_NOCACHE) - npte |= PG_N; + npte |= pmap_pat_flags(flags); opte = pmap_pte_testset(pte, npte); /* zap! */ #if defined(DIAGNOSTIC) /* XXX For now... */ @@ -3961,10 +4025,9 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t #endif /* XEN */ npte = ma | protection_codes[prot] | PG_V; + npte |= pmap_pat_flags(flags); if (wired) npte |= PG_W; - if (flags & PMAP_NOCACHE) - npte |= PG_N; if (va < VM_MAXUSER_ADDRESS) npte |= PG_u; else if (va < VM_MAX_ADDRESS) diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/arch/xen/x86/cpu.c --- a/sys/arch/xen/x86/cpu.c +++ b/sys/arch/xen/x86/cpu.c @@ -517,6 +517,7 @@ cpu_attach_common(device_t parent, devic panic("unknown processor type??\n"); } + pat_init(ci); atomic_or_32(&cpus_attached, ci->ci_cpumask); #if 0 diff -r b7b4ecb9c0cb -r d0d616f5cf8e sys/uvm/uvm_pmap.h --- a/sys/uvm/uvm_pmap.h +++ b/sys/uvm/uvm_pmap.h @@ -109,7 +109,28 @@ typedef struct pmap_statistics *pmap_sta #define PMAP_KMPAGE 0x00000000 #endif /* PMAP_ENABLE_PMAP_KMPAGE */ -#define PMAP_MD_MASK 0xff000000 /* Machine-dependent bits */ +#define PMAP_MD_MASK 0xff000000 /* [BOTH] Machine-dependent bits */ +#define PMAP_PROT_MASK 0x0000000f /* [BOTH] VM_PROT_* bit mask */ + +/* + * Cache Type Encodings + */ +#define PMAP_CACHE_MASK 0x00000f00 + +/* All accesses are uncacheable. No speculative accesses. */ +#define PMAP_NOCACHE 0x00000100 /* [BOTH] */ + +/* All accesses are uncacheable. No speculative accesses. + * Writes are combined. */ +#define PMAP_WRITE_COMBINE 0x00000200 /* [BOTH] */ + +/* On reads, cachelines become shared or exclusive if allocated on cache miss. + * On writes, cachelines become modified on a cache miss. */ +#define PMAP_WRITE_BACK 0x00000300 /* [BOTH] */ + +/* = PMAP_NOCACHE but overrideable (e.g. on x86 by MTRRs) */ +#define PMAP_NOCACHE_OVR 0x00000400 /* [BOTH] */ + #ifndef PMAP_EXCLUDE_DECLS /* Used in Sparc port to virtualize pmap mod */ #ifdef _KERNEL