Originally based on a patch from Eric Biederman, but heavily changed.

PAT set as below
PAT(0,WB) | PAT(1,WT) | PAT(2,WC) | PAT(3,UC)
So, only change from boot setting is UC_MINUS -> WC.

Also, PAT WC is enabled only on recent Intel CPUs. Other CPUs can be added as
they are tested with these patches.

Signed-off-by: Venkatesh Pallipadi <[EMAIL PROTECTED]>
Signed-off-by: Suresh Siddha <[EMAIL PROTECTED]>
Index: linux-2.6.git/arch/x86/mm/Makefile_64
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/Makefile_64  2008-01-08 12:42:58.000000000 
-0800
+++ linux-2.6.git/arch/x86/mm/Makefile_64       2008-01-08 12:43:13.000000000 
-0800
@@ -2,7 +2,7 @@
 # Makefile for the linux x86_64-specific parts of the memory manager.
 #
 
-obj-y   := init_64.o fault_64.o ioremap_64.o extable.o pageattr_64.o mmap.o
+obj-y   := init_64.o fault_64.o ioremap_64.o extable.o pageattr_64.o mmap.o 
pat.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
Index: linux-2.6.git/arch/x86/mm/pat.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.git/arch/x86/mm/pat.c     2008-01-08 12:43:13.000000000 -0800
@@ -0,0 +1,70 @@
+/* Handle caching attributes in page tables (PAT) */
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+#include <asm/processor.h>
+
+static u64 boot_pat_state;
+int pat_wc_enabled = 0;
+
+enum {
+       PAT_UC = 0,     /* uncached */
+       PAT_WC = 1,             /* Write combining */
+       PAT_WT = 4,             /* Write Through */
+       PAT_WP = 5,             /* Write Protected */
+       PAT_WB = 6,             /* Write Back (default) */
+       PAT_UC_MINUS = 7,       /* UC, but can be overriden by MTRR */
+};
+
+#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
+
+static int pat_known_cpu(void)
+{
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+           (boot_cpu_data.x86 == 0xF ||
+            (boot_cpu_data.x86 == 0x6 && boot_cpu_data.x86_model >= 0x15)))
+               return cpu_has_pat;
+
+       return 0;
+}
+
+void pat_init(void)
+{
+       u64 pat;
+       if (!smp_processor_id() && !pat_known_cpu())
+               return;
+
+       if (smp_processor_id() && !pat_wc_enabled)
+               return;
+
+       /* Set PWT+PCD to Write-Combining. All other bits stay the same */
+       /* PTE encoding used in Linux:
+             PAT
+             |PCD
+             ||PWT
+             |||
+             000 WB         default
+             010 WC         _PAGE_WC
+             011 UC         _PAGE_PCD
+               PAT bit unused */
+       pat = PAT(0,WB) | PAT(1,WT) | PAT(2,WC) | PAT(3,UC) |
+             PAT(4,WB) | PAT(5,WT) | PAT(6,WC) | PAT(7,UC);
+
+       if (!pat_wc_enabled) {
+               rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+               pat_wc_enabled = 1;
+       }
+
+       wrmsrl(MSR_IA32_CR_PAT, pat);
+       printk("cpu %d, old 0x%Lx, new 0x%Lx\n", smp_processor_id(),
+                       boot_pat_state, pat);
+}
+
+#undef PAT
+
+void pat_shutdown(void)
+{
+}
+
Index: linux-2.6.git/arch/x86/pci/i386.c
===================================================================
--- linux-2.6.git.orig/arch/x86/pci/i386.c      2008-01-08 12:42:58.000000000 
-0800
+++ linux-2.6.git/arch/x86/pci/i386.c   2008-01-08 12:43:13.000000000 -0800
@@ -301,7 +301,6 @@
                        enum pci_mmap_state mmap_state, int write_combine)
 {
        unsigned long prot;
-
        /* I/O space cannot be accessed via normal processor loads and
         * stores on this platform.
         */
@@ -311,14 +310,25 @@
        /* Leave vm_pgoff as-is, the PCI space address is the physical
         * address on this platform.
         */
-       prot = pgprot_val(vma->vm_page_prot);
-       if (boot_cpu_data.x86 > 3)
-               prot |= _PAGE_PCD | _PAGE_PWT;
-       vma->vm_page_prot = __pgprot(prot);
+       if (pat_wc_enabled) {
+               if (write_combine) {
+                       vma->vm_page_prot =
+                               pgprot_writecombine(vma->vm_page_prot);
+               } else {
+                       vma->vm_page_prot =
+                               pgprot_noncached(vma->vm_page_prot);
+               }
+       } else {
+               /* Write-combine setting is ignored, it is changed via the mtrr
+                * interfaces on this platform.
+                */
+               prot = pgprot_val(vma->vm_page_prot);
+               if (boot_cpu_data.x86 > 3) {
+                       prot |= _PAGE_PCD;
+               }
+               vma->vm_page_prot = __pgprot(prot);
+       }
 
-       /* Write-combine setting is ignored, it is changed via the mtrr
-        * interfaces on this platform.
-        */
        if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                               vma->vm_end - vma->vm_start,
                               vma->vm_page_prot))
Index: linux-2.6.git/include/asm-x86/cpufeature.h
===================================================================
--- linux-2.6.git.orig/include/asm-x86/cpufeature.h     2008-01-08 
12:42:58.000000000 -0800
+++ linux-2.6.git/include/asm-x86/cpufeature.h  2008-01-08 12:43:13.000000000 
-0800
@@ -167,6 +167,7 @@
 #define cpu_has_pebs           boot_cpu_has(X86_FEATURE_PEBS)
 #define cpu_has_clflush                boot_cpu_has(X86_FEATURE_CLFLSH)
 #define cpu_has_bts            boot_cpu_has(X86_FEATURE_BTS)
+#define cpu_has_pat            boot_cpu_has(X86_FEATURE_PAT)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg                1
Index: linux-2.6.git/include/asm-x86/msr-index.h
===================================================================
--- linux-2.6.git.orig/include/asm-x86/msr-index.h      2008-01-08 
12:42:58.000000000 -0800
+++ linux-2.6.git/include/asm-x86/msr-index.h   2008-01-08 12:43:13.000000000 
-0800
@@ -70,6 +70,7 @@
 #define DEBUGCTLMSR_LBR                (1UL << _DEBUGCTLMSR_LBR)
 #define DEBUGCTLMSR_BTF                (1UL << _DEBUGCTLMSR_BTF)
 
+#define MSR_IA32_CR_PAT                        0x00000277
 #define MSR_IA32_MC0_CTL               0x00000400
 #define MSR_IA32_MC0_STATUS            0x00000401
 #define MSR_IA32_MC0_ADDR              0x00000402
Index: linux-2.6.git/include/asm-x86/pgtable_64.h
===================================================================
--- linux-2.6.git.orig/include/asm-x86/pgtable_64.h     2008-01-08 
12:42:58.000000000 -0800
+++ linux-2.6.git/include/asm-x86/pgtable_64.h  2008-01-08 12:43:13.000000000 
-0800
@@ -158,7 +158,7 @@
 #define _PAGE_RW       (_AC(1, UL)<<_PAGE_BIT_RW)
 #define _PAGE_USER     (_AC(1, UL)<<_PAGE_BIT_USER)
 #define _PAGE_PWT      (_AC(1, UL)<<_PAGE_BIT_PWT)
-#define _PAGE_PCD      (_AC(1, UL)<<_PAGE_BIT_PCD)
+#define _PAGE_PCD      ((_AC(1, UL)<<_PAGE_BIT_PCD) | _PAGE_PWT)
 #define _PAGE_ACCESSED (_AC(1, UL)<<_PAGE_BIT_ACCESSED)
 #define _PAGE_DIRTY    (_AC(1, UL)<<_PAGE_BIT_DIRTY)
 /* 2MB page */
@@ -167,6 +167,10 @@
 #define _PAGE_FILE     (_AC(1, UL)<<_PAGE_BIT_FILE)
 /* Global TLB entry */
 #define _PAGE_GLOBAL   (_AC(1, UL)<<_PAGE_BIT_GLOBAL)
+/* We redefine PCD to be write combining. PAT bit is not used */
+#define _PAGE_WC       (_AC(1, UL)<<_PAGE_BIT_PCD)
+
+#define _PAGE_CACHE_MASK       (_PAGE_PCD)
 
 #define _PAGE_PROTNONE 0x080   /* If not present */
 #define _PAGE_NX        (_AC(1, UL)<<_PAGE_BIT_NX)
@@ -189,13 +193,15 @@
 #define __PAGE_KERNEL_EXEC \
        (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define __PAGE_KERNEL_NOCACHE \
-       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_PWT | 
_PAGE_ACCESSED | _PAGE_NX)
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | 
_PAGE_NX)
+#define __PAGE_KERNEL_WC \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_WC | _PAGE_ACCESSED | 
_PAGE_NX)
 #define __PAGE_KERNEL_RO \
        (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
 #define __PAGE_KERNEL_VSYSCALL \
        (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define __PAGE_KERNEL_VSYSCALL_NOCACHE \
-       (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_PWT)
+       (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD)
 #define __PAGE_KERNEL_LARGE \
        (__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC \
@@ -207,6 +213,7 @@
 #define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
 #define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC)
 #define PAGE_KERNEL_VSYSCALL32 __pgprot(__PAGE_KERNEL_VSYSCALL)
 #define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
 #define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
@@ -302,8 +309,24 @@
 
 /*
  * Macro to mark a page protection value as "uncacheable".
+ * Accesses through a uncached translation bypasses the cache
+ * and do not allow for consecutive writes to be combined.
  */
-#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | 
_PAGE_PWT))
+#define pgprot_noncached(prot) \
+       __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_MASK) | _PAGE_PCD)
+
+/*
+ * Macro to make mark a page protection value as "write-combining".
+ * Accesses through a write-combining translation works bypasses the
+ * caches, but does allow for consecutive writes to be combined into
+ * single (but larger) write transactions.
+ * This is mostly useful for IO accesses, for memory it is often slower.
+ * It also implies uncached.
+ */
+#define pgprot_writecombine(prot) \
+       __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_MASK) | _PAGE_WC)
+
+#define pgprot_nonstd(prot) (pgprot_val(prot) & _PAGE_CACHE_MASK)
 
 static inline int pmd_large(pmd_t pte) { 
        return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; 
@@ -417,6 +440,7 @@
 #define pgtable_cache_init()   do { } while (0)
 #define check_pgt_cache()      do { } while (0)
 
+/* AGP users use MTRRs for now. Need to add an ioctl to agpgart for WC */
 #define PAGE_AGP    PAGE_KERNEL_NOCACHE
 #define HAVE_PAGE_AGP 1
 
Index: linux-2.6.git/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/cpu/mtrr/generic.c       2008-01-08 
12:42:58.000000000 -0800
+++ linux-2.6.git/arch/x86/kernel/cpu/mtrr/generic.c    2008-01-08 
12:43:13.000000000 -0800
@@ -79,19 +79,23 @@
                        base, base + step - 1, mtrr_attrib_to_str(*types));
 }
 
+static void prepare_set(void);
+static void post_set(void);
+
 /*  Grab all of the MTRR state for this CPU into *state  */
 void __init get_mtrr_state(void)
 {
        unsigned int i;
        struct mtrr_var_range *vrs;
        unsigned lo, dummy;
+       unsigned long flags;
 
        if (!mtrr_state.var_ranges) {
-               mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct 
mtrr_var_range), 
+               mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct 
mtrr_var_range),
                                                GFP_KERNEL);
                if (!mtrr_state.var_ranges)
                        return;
-       } 
+       }
        vrs = mtrr_state.var_ranges;
 
        rdmsr(MTRRcap_MSR, lo, dummy);
@@ -137,6 +141,16 @@
                                printk(KERN_INFO "MTRR %u disabled\n", i);
                }
        }
+
+       /* PAT setup for BP. we need to go through sync steps here */
+       local_irq_save(flags);
+       prepare_set();
+
+       pat_init();
+
+       post_set();
+       local_irq_restore(flags);
+
 }
 
 /*  Some BIOS's are fucked and don't set all MTRRs the same!  */
@@ -399,6 +413,9 @@
        /* Actually set the state */
        mask = set_mtrr_state();
 
+       /* also set PAT */
+       pat_init();
+
        post_set();
        local_irq_restore(flags);
 
Index: linux-2.6.git/arch/x86/mm/Makefile_32
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/Makefile_32  2008-01-08 12:42:58.000000000 
-0800
+++ linux-2.6.git/arch/x86/mm/Makefile_32       2008-01-08 12:43:13.000000000 
-0800
@@ -2,7 +2,7 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y  := init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o 
pageattr_32.o mmap.o
+obj-y  := init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o 
pageattr_32.o mmap.o pat.o
 
 obj-$(CONFIG_NUMA) += discontig_32.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
Index: linux-2.6.git/include/asm-x86/pgtable_32.h
===================================================================
--- linux-2.6.git.orig/include/asm-x86/pgtable_32.h     2008-01-08 
12:42:58.000000000 -0800
+++ linux-2.6.git/include/asm-x86/pgtable_32.h  2008-01-08 12:43:36.000000000 
-0800
@@ -107,7 +107,7 @@
 #define _PAGE_RW       0x002
 #define _PAGE_USER     0x004
 #define _PAGE_PWT      0x008
-#define _PAGE_PCD      0x010
+#define _PAGE_PCD      0x018
 #define _PAGE_ACCESSED 0x020
 #define _PAGE_DIRTY    0x040
 #define _PAGE_PSE      0x080   /* 4 MB (or 2MB) page, Pentium+, if present.. */
@@ -116,6 +116,12 @@
 #define _PAGE_UNUSED2  0x400
 #define _PAGE_UNUSED3  0x800
 
+/* We redefine PCD to be write combining. PAT bit is not used */
+
+#define _PAGE_WC       0x10
+
+#define _PAGE_CACHE_MASK       0x18
+
 /* If _PAGE_PRESENT is clear, we use these: */
 #define _PAGE_FILE     0x040   /* nonlinear file mapping, saved PTE; 
unset:swap */
 #define _PAGE_PROTNONE 0x080   /* if the user mapped it with PROT_NONE;
@@ -156,7 +162,7 @@
 extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define __PAGE_KERNEL_RO               (__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX               (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE          (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_NOCACHE          (__PAGE_KERNEL | _PAGE_PCD)
 #define __PAGE_KERNEL_LARGE            (__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC       (__PAGE_KERNEL_EXEC | _PAGE_PSE)
 
@@ -358,7 +364,18 @@
  * it, this is a no-op.
  */
 #define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3)                        
                  \
-                                ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | 
_PAGE_PWT)) : (prot))
+                                ? (__pgprot(pgprot_val(prot) | _PAGE_PCD)) : 
(prot))
+
+/*
+ * Macro to make mark a page protection value as "write-combining".
+ * Accesses through a write-combining translation works bypasses the
+ * caches, but does allow for consecutive writes to be combined into
+ * single (but larger) write transactions.
+ * This is mostly useful for IO accesses, for memory it is often slower.
+ * It also implies uncached.
+ */
+#define pgprot_writecombine(prot) \
+       __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_MASK) | _PAGE_WC)
 
 /*
  * Conversion functions: convert a page and protection to a page entry,
Index: linux-2.6.git/include/asm-x86/processor.h
===================================================================
--- linux-2.6.git.orig/include/asm-x86/processor.h      2008-01-08 
12:42:58.000000000 -0800
+++ linux-2.6.git/include/asm-x86/processor.h   2008-01-08 12:43:13.000000000 
-0800
@@ -647,6 +647,10 @@
 
 extern int force_mwait;
 
+extern int pat_wc_enabled;
+extern void pat_init(void);
+extern void pat_shutdown(void);
+
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 extern unsigned long boot_option_idle_override;
Index: linux-2.6.git/include/asm-generic/pgtable.h
===================================================================
--- linux-2.6.git.orig/include/asm-generic/pgtable.h    2008-01-08 
12:42:58.000000000 -0800
+++ linux-2.6.git/include/asm-generic/pgtable.h 2008-01-08 12:43:13.000000000 
-0800
@@ -154,6 +154,10 @@
 })
 #endif
 
+#ifndef pgprot_writecombine
+#define pgprot_writecombine pgprot_noncached
+#endif
+
 /*
  * When walking page tables, we usually want to skip any p?d_none entries;
  * and any p?d_bad entries - reporting the error before resetting to none.
Index: linux-2.6.git/arch/x86/mm/ioremap_32.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/ioremap_32.c 2008-01-08 12:42:58.000000000 
-0800
+++ linux-2.6.git/arch/x86/mm/ioremap_32.c      2008-01-08 12:43:13.000000000 
-0800
@@ -119,7 +119,7 @@
 void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
        unsigned long last_addr;
-       void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
+       void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
        if (!p) 
                return p; 
 
Index: linux-2.6.git/arch/x86/mm/ioremap_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/ioremap_64.c 2008-01-08 12:42:58.000000000 
-0800
+++ linux-2.6.git/arch/x86/mm/ioremap_64.c      2008-01-08 12:43:13.000000000 
-0800
@@ -138,7 +138,7 @@
 
 void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
-       return __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
+       return __ioremap(phys_addr, size, _PAGE_PCD);
 }
 EXPORT_SYMBOL(ioremap_nocache);
 

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to