Linus,

Please pull the latest x86-boot-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-boot-for-linus

   # HEAD: 5773ebfee729acf93b330664eab4c8d77edc2193 x86/kconfig: Remove 
misleading note regarding hibernation and KASLR

Misc updates: fix e820 error handling, convert page table setup code from 
assembly 
to C, fix kexec environment bug, plus small cleanups.


 Thanks,

        Ingo

------------------>
Arnd Bergmann (1):
      x86/e820/32: Fix e820_search_gap() error handling on x86-32

Boris Ostrovsky (1):
      x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C

Dave Jiang (1):
      x86/boot: Fix KASLR and memmap= collision

Niklas Cassel (1):
      x86/kconfig: Remove misleading note regarding hibernation and KASLR

Wei Yang (1):
      x86/e820: Make e820_search_gap() static and remove unused variables


 arch/x86/Kconfig                  |   4 --
 arch/x86/boot/boot.h              |   1 +
 arch/x86/boot/compressed/kaslr.c  | 140 +++++++++++++++++++++++++++++++++++++-
 arch/x86/boot/string.c            |  13 ++++
 arch/x86/include/asm/e820.h       |   2 -
 arch/x86/include/asm/pgtable_32.h |  32 +++++++++
 arch/x86/kernel/e820.c            |  22 +++---
 arch/x86/kernel/head32.c          |  62 +++++++++++++++++
 arch/x86/kernel/head_32.S         | 121 ++------------------------------
 9 files changed, 261 insertions(+), 136 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e487493bbd47..4e6dbca03aed 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1994,10 +1994,6 @@ config RANDOMIZE_BASE
          theoretically possible, but the implementations are further
          limited due to memory layouts.
 
-         If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
-         time. To enable it, boot with "kaslr" on the kernel command
-         line (which will also disable hibernation).
-
          If unsure, say N.
 
 # Relocation on x86 needs some additional build support
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index e5612f3e3b57..9b42b6d1e902 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -333,6 +333,7 @@ size_t strnlen(const char *s, size_t maxlen);
 unsigned int atou(const char *s);
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int 
base);
 size_t strlen(const char *s);
+char *strchr(const char *s, int c);
 
 /* tty.c */
 void puts(const char *);
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a66854d99ee1..8b7c9e75edcb 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -11,6 +11,7 @@
  */
 #include "misc.h"
 #include "error.h"
+#include "../boot.h"
 
 #include <generated/compile.h>
 #include <linux/module.h>
@@ -52,15 +53,22 @@ static unsigned long get_boot_seed(void)
 #include "../../lib/kaslr.c"
 
 struct mem_vector {
-       unsigned long start;
-       unsigned long size;
+       unsigned long long start;
+       unsigned long long size;
 };
 
+/* Only supporting at most 4 unusable memmap regions with kaslr */
+#define MAX_MEMMAP_REGIONS     4
+
+static bool memmap_too_large;
+
 enum mem_avoid_index {
        MEM_AVOID_ZO_RANGE = 0,
        MEM_AVOID_INITRD,
        MEM_AVOID_CMDLINE,
        MEM_AVOID_BOOTPARAMS,
+       MEM_AVOID_MEMMAP_BEGIN,
+       MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
        MEM_AVOID_MAX,
 };
 
@@ -77,6 +85,123 @@ static bool mem_overlaps(struct mem_vector *one, struct 
mem_vector *two)
        return true;
 }
 
+/**
+ *     _memparse - Parse a string with mem suffixes into a number
+ *     @ptr: Where parse begins
+ *     @retptr: (output) Optional pointer to next char after parse completes
+ *
+ *     Parses a string into a number.  The number stored at @ptr is
+ *     potentially suffixed with K, M, G, T, P, E.
+ */
+static unsigned long long _memparse(const char *ptr, char **retptr)
+{
+       char *endptr;   /* Local pointer to end of parsed string */
+
+       unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
+
+       switch (*endptr) {
+       case 'E':
+       case 'e':
+               ret <<= 10;
+       case 'P':
+       case 'p':
+               ret <<= 10;
+       case 'T':
+       case 't':
+               ret <<= 10;
+       case 'G':
+       case 'g':
+               ret <<= 10;
+       case 'M':
+       case 'm':
+               ret <<= 10;
+       case 'K':
+       case 'k':
+               ret <<= 10;
+               endptr++;
+       default:
+               break;
+       }
+
+       if (retptr)
+               *retptr = endptr;
+
+       return ret;
+}
+
+static int
+parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
+{
+       char *oldp;
+
+       if (!p)
+               return -EINVAL;
+
+       /* We don't care about this option here */
+       if (!strncmp(p, "exactmap", 8))
+               return -EINVAL;
+
+       oldp = p;
+       *size = _memparse(p, &p);
+       if (p == oldp)
+               return -EINVAL;
+
+       switch (*p) {
+       case '@':
+               /* Skip this region, usable */
+               *start = 0;
+               *size = 0;
+               return 0;
+       case '#':
+       case '$':
+       case '!':
+               *start = _memparse(p + 1, &p);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static void mem_avoid_memmap(void)
+{
+       char arg[128];
+       int rc;
+       int i;
+       char *str;
+
+       /* See if we have any memmap areas */
+       rc = cmdline_find_option("memmap", arg, sizeof(arg));
+       if (rc <= 0)
+               return;
+
+       i = 0;
+       str = arg;
+       while (str && (i < MAX_MEMMAP_REGIONS)) {
+               int rc;
+               unsigned long long start, size;
+               char *k = strchr(str, ',');
+
+               if (k)
+                       *k++ = 0;
+
+               rc = parse_memmap(str, &start, &size);
+               if (rc < 0)
+                       break;
+               str = k;
+               /* A usable region that should not be skipped */
+               if (size == 0)
+                       continue;
+
+               mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
+               mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
+               i++;
+       }
+
+       /* More than 4 memmaps, fail kaslr */
+       if ((i >= MAX_MEMMAP_REGIONS) && str)
+               memmap_too_large = true;
+}
+
 /*
  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
  * The mem_avoid array is used to store the ranges that need to be avoided
@@ -197,6 +322,9 @@ static void mem_avoid_init(unsigned long input, unsigned 
long input_size,
 
        /* We don't need to set a mapping for setup_data. */
 
+       /* Mark the memmap regions we need to avoid */
+       mem_avoid_memmap();
+
 #ifdef CONFIG_X86_VERBOSE_BOOTUP
        /* Make sure video RAM can be used. */
        add_identity_map(0, PMD_SIZE);
@@ -379,6 +507,12 @@ static unsigned long find_random_phys_addr(unsigned long 
minimum,
        int i;
        unsigned long addr;
 
+       /* Check if we had too many memmaps. */
+       if (memmap_too_large) {
+               debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n");
+               return 0;
+       }
+
        /* Make sure minimum is aligned. */
        minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
 
@@ -456,7 +590,7 @@ void choose_random_location(unsigned long input,
        /* Walk e820 and find a random address. */
        random_addr = find_random_phys_addr(min_addr, output_size);
        if (!random_addr) {
-               warn("KASLR disabled: could not find suitable E820 region!");
+               warn("Physical KASLR disabled: no suitable memory region!");
        } else {
                /* Update the new physical address location. */
                if (*output != random_addr) {
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 9e240fcba784..5457b02fc050 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -156,3 +156,16 @@ char *strstr(const char *s1, const char *s2)
        }
        return NULL;
 }
+
+/**
+ * strchr - Find the first occurrence of the character c in the string s.
+ * @s: the string to be searched
+ * @c: the character to search for
+ */
+char *strchr(const char *s, int c)
+{
+       while (*s != (char)c)
+               if (*s++ == '\0')
+                       return NULL;
+       return (char *)s;
+}
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index ec23d8e1297c..67313f3a9874 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned 
old_type,
                             int checktype);
 extern void update_e820(void);
 extern void e820_setup_gap(void);
-extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
-                       unsigned long start_addr, unsigned long long end_addr);
 struct setup_data;
 extern void parse_e820_ext(u64 phys_addr, u32 data_len);
 
diff --git a/arch/x86/include/asm/pgtable_32.h 
b/arch/x86/include/asm/pgtable_32.h
index b6c0b404898a..fbc73360aea0 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -27,6 +27,7 @@ struct vm_area_struct;
 
 extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
+extern pmd_t initial_pg_pmd[];
 
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
@@ -75,4 +76,35 @@ do {                                         \
 #define kern_addr_valid(kaddr) (0)
 #endif
 
+/*
+ * This is how much memory in addition to the memory covered up to
+ * and including _end we need mapped initially.
+ * We need:
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
+ */
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+
+/*
+ * Number of possible pages in the lowmem region.
+ *
+ * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
+ * gas warning about overflowing shift count when gas has been compiled
+ * with only a host target support using a 32-bit type for internal
+ * representation.
+ */
+#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+
 #endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 90e8dde3ec26..b2bbad6ebe4d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -580,24 +580,19 @@ static void __init update_e820_saved(void)
 }
 #define MAX_GAP_END 0x100000000ull
 /*
- * Search for a gap in the e820 memory space from start_addr to end_addr.
+ * Search for a gap in the e820 memory space from 0 to MAX_GAP_END.
  */
-__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
-               unsigned long start_addr, unsigned long long end_addr)
+static int __init e820_search_gap(unsigned long *gapstart,
+               unsigned long *gapsize)
 {
-       unsigned long long last;
+       unsigned long long last = MAX_GAP_END;
        int i = e820->nr_map;
        int found = 0;
 
-       last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
-
        while (--i >= 0) {
                unsigned long long start = e820->map[i].addr;
                unsigned long long end = start + e820->map[i].size;
 
-               if (end < start_addr)
-                       continue;
-
                /*
                 * Since "last" is at most 4GB, we know we'll
                 * fit in 32 bits if this condition is true
@@ -628,18 +623,19 @@ __init void e820_setup_gap(void)
        unsigned long gapstart, gapsize;
        int found;
 
-       gapstart = 0x10000000;
        gapsize = 0x400000;
-       found  = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
+       found  = e820_search_gap(&gapstart, &gapsize);
 
-#ifdef CONFIG_X86_64
        if (!found) {
+#ifdef CONFIG_X86_64
                gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
                printk(KERN_ERR
        "e820: cannot find a gap in the 32bit address range\n"
        "e820: PCI devices with unassigned 32bit BARs may break!\n");
-       }
+#else
+               gapstart = 0x10000000;
 #endif
+       }
 
        /*
         * e820_reserve_resources_late protect stolen RAM already
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index f16c55bfc090..e5fb436a6548 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
 
        start_kernel();
 }
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base.  The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * In PAE mode initial_page_table is statically defined to contain
+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD entries
+ * to the first kernel PMD. Note the upper half of each PMD or PTE are
+ * always zero at this stage.
+ */
+void __init mk_early_pgtbl_32(void)
+{
+#ifdef __pa
+#undef __pa
+#endif
+#define __pa(x)  ((unsigned long)(x) - PAGE_OFFSET)
+       pte_t pte, *ptep;
+       int i;
+       unsigned long *ptr;
+       /* Enough space to fit pagetables for the low memory linear map */
+       const unsigned long limit = __pa(_end) +
+               (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
+#ifdef CONFIG_X86_PAE
+       pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
+#define SET_PL2(pl2, val)    { (pl2).pmd = (val); }
+#else
+       pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
+#define SET_PL2(pl2, val)   { (pl2).pgd = (val); }
+#endif
+
+       ptep = (pte_t *)__pa(__brk_base);
+       pte.pte = PTE_IDENT_ATTR;
+
+       while ((pte.pte & PTE_PFN_MASK) < limit) {
+
+               SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
+               *pl2p = pl2;
+#ifndef CONFIG_X86_PAE
+               /* Kernel PDE entry */
+               *(pl2p +  ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
+#endif
+               for (i = 0; i < PTRS_PER_PTE; i++) {
+                       *ptep = pte;
+                       pte.pte += PAGE_SIZE;
+                       ptep++;
+               }
+
+               pl2p++;
+       }
+
+       ptr = (unsigned long *)__pa(&max_pfn_mapped);
+       /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
+       *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+
+       ptr = (unsigned long *)__pa(&_brk_end);
+       *ptr = (unsigned long)ptep + PAGE_OFFSET;
+}
+
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 4e8577d03372..1f85ee8f9439 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -24,6 +24,7 @@
 #include <asm/nops.h>
 #include <asm/bootparam.h>
 #include <asm/export.h>
+#include <asm/pgtable_32.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -41,44 +42,10 @@
 #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
 #define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
 
-/*
- * This is how much memory in addition to the memory covered up to
- * and including _end we need mapped initially.
- * We need:
- *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
- *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- *
- * KERNEL_IMAGE_SIZE should be greater than pa(_end)
- * and small than max_low_pfn, otherwise will waste some page table entries
- */
-
-#if PTRS_PER_PMD > 1
-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
-#else
-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
-#endif
 
 #define SIZEOF_PTREGS 17*4
 
 /*
- * Number of possible pages in the lowmem region.
- *
- * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
- * gas warning about overflowing shift count when gas has been compiled
- * with only a host target support using a 32-bit type for internal
- * representation.
- */
-LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)
-
-/* Enough space to fit pagetables for the low memory linear map */
-MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
-
-/*
  * Worst-case size of the kernel mapping we need to make:
  * a relocatable kernel can live anywhere in lowmem, so we need to be able
  * to map all of lowmem.
@@ -160,90 +127,15 @@ ENTRY(startup_32)
        call load_ucode_bsp
 #endif
 
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond __brk_base.  The variable
- * _brk_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end.
- */
-#ifdef CONFIG_X86_PAE
-
-       /*
-        * In PAE mode initial_page_table is statically defined to contain
-        * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
-        * entries). The identity mapping is handled by pointing two PGD entries
-        * to the first kernel PMD.
-        *
-        * Note the upper half of each PMD or PTE are always zero at this stage.
-        */
-
-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
-
-       xorl %ebx,%ebx                          /* %ebx is kept at zero */
-
-       movl $pa(__brk_base), %edi
-       movl $pa(initial_pg_pmd), %edx
-       movl $PTE_IDENT_ATTR, %eax
-10:
-       leal PDE_IDENT_ATTR(%edi),%ecx          /* Create PMD entry */
-       movl %ecx,(%edx)                        /* Store PMD entry */
-                                               /* Upper half already zero */
-       addl $8,%edx
-       movl $512,%ecx
-11:
-       stosl
-       xchgl %eax,%ebx
-       stosl
-       xchgl %eax,%ebx
-       addl $0x1000,%eax
-       loop 11b
-
-       /*
-        * End condition: we must map up to the end + MAPPING_BEYOND_END.
-        */
-       movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-       cmpl %ebp,%eax
-       jb 10b
-1:
-       addl $__PAGE_OFFSET, %edi
-       movl %edi, pa(_brk_end)
-       shrl $12, %eax
-       movl %eax, pa(max_pfn_mapped)
+       /* Create early pagetables. */
+       call  mk_early_pgtbl_32
 
        /* Do early initialization of the fixmap area */
        movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#ifdef  CONFIG_X86_PAE
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
        movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
-#else  /* Not PAE */
-
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
-       movl $pa(__brk_base), %edi
-       movl $pa(initial_page_table), %edx
-       movl $PTE_IDENT_ATTR, %eax
-10:
-       leal PDE_IDENT_ATTR(%edi),%ecx          /* Create PDE entry */
-       movl %ecx,(%edx)                        /* Store identity PDE entry */
-       movl %ecx,page_pde_offset(%edx)         /* Store kernel PDE entry */
-       addl $4,%edx
-       movl $1024, %ecx
-11:
-       stosl
-       addl $0x1000,%eax
-       loop 11b
-       /*
-        * End condition: we must map up to the end + MAPPING_BEYOND_END.
-        */
-       movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-       cmpl %ebp,%eax
-       jb 10b
-       addl $__PAGE_OFFSET, %edi
-       movl %edi, pa(_brk_end)
-       shrl $12, %eax
-       movl %eax, pa(max_pfn_mapped)
-
-       /* Do early initialization of the fixmap area */
-       movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#else
        movl %eax,pa(initial_page_table+0xffc)
 #endif
 
@@ -666,6 +558,7 @@ ENTRY(setup_once_ref)
 __PAGE_ALIGNED_BSS
        .align PAGE_SIZE
 #ifdef CONFIG_X86_PAE
+.globl initial_pg_pmd
 initial_pg_pmd:
        .fill 1024*KPMDS,4,0
 #else

Reply via email to