On Fri, Feb 20, 2015 at 6:49 PM, Baoquan He <b...@redhat.com> wrote:
> On 02/20/15 at 03:53pm, Yinghai Lu wrote:
> At the beginning I did it just as you said, add IDT table and $PF
> handler. Get page fault address and built ident mapping around it when
> reload kernel above 4G. In this case 3 more pages are enough if kernel
> is put to another 512G and cross the boundary of 512G.
> kernel_ident_mapping_init code can be borrowed and need be adjusted a
> little bit. This works as expected, but a GPF reported and reboot to
> BIOS. That's why I made a simple debug patch as I pasted before to
> filter unnecessary interference.

Please use attached one to instead of the #PF handler in boot stage.
It works when hard-code to move output above 4G.

From: Yinghai Lu <ying...@kernel.org>
Subject: [PATCH] x86, boot: Enable ident_mapping for kasl above 4G for 64bit

split kernel_ident_mapping_init() and call that in boot::misc.c stage.
it will cover new range kernel space that is above 4G.

Signed-off-by: Yinghai Lu <ying...@kernel.org>

---
 arch/x86/boot/compressed/misc.c     |   10 ++++
 arch/x86/boot/compressed/misc_pgt.c |   61 +++++++++++++++++++++++++++++
 arch/x86/include/asm/page.h         |    5 ++
 arch/x86/mm/ident_map.c             |   74 ++++++++++++++++++++++++++++++++++++
 arch/x86/mm/init_64.c               |   74 ------------------------------------
 5 files changed, 151 insertions(+), 73 deletions(-)

Index: linux-2.6/arch/x86/boot/compressed/misc.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/misc.c
+++ linux-2.6/arch/x86/boot/compressed/misc.c
@@ -9,6 +9,11 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */

+#ifdef CONFIG_X86_64
+#define __pa(x)  ((unsigned long)(x))
+#define __va(x)  ((void *)((unsigned long)(x)))
+#endif
+
 #include "misc.h"
 #include "../string.h"

@@ -366,6 +371,8 @@ static void parse_elf(void *output)
     free(phdrs);
 }

+#include "misc_pgt.c"
+
 asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
                   unsigned char *input_data,
                   unsigned long input_len,
@@ -421,6 +428,9 @@ asmlinkage __visible void *decompress_ke
         error("Wrong destination address");
 #endif

+    if (output != output_orig)
+        fill_linux64_pagetable((unsigned long)output, output_len);
+
     debug_putstr("\nDecompressing Linux... ");
     decompress(input_data, input_len, NULL, NULL, output, NULL, error);
     parse_elf(output);
Index: linux-2.6/arch/x86/include/asm/page.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page.h
+++ linux-2.6/arch/x86/include/asm/page.h
@@ -37,7 +37,10 @@ static inline void copy_user_page(void *
     alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE

+#ifndef __pa
 #define __pa(x)        __phys_addr((unsigned long)(x))
+#endif
+
 #define __pa_nodebug(x)    __phys_addr_nodebug((unsigned long)(x))
 /* __pa_symbol should be used for C visible symbols.
    This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@ static inline void copy_user_page(void *
 #define __pa_symbol(x) \
     __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))

+#ifndef __va
 #define __va(x)            ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#endif

 #define __boot_va(x)        __va(x)
 #define __boot_pa(x)        __pa(x)
Index: linux-2.6/arch/x86/boot/compressed/misc_pgt.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/boot/compressed/misc_pgt.c
@@ -0,0 +1,61 @@
+
+#ifdef CONFIG_X86_64
+#include <asm/init.h>
+#include <asm/pgtable.h>
+
+#include "../../mm/ident_map.h"
+
+struct alloc_pgt_data {
+    unsigned char *pgt_buf;
+    unsigned long pgt_buf_size;
+    unsigned long pgt_buf_offset;
+};
+
+static void *alloc_pgt_page(void *context)
+{
+    struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
+    unsigned char *p = (unsigned char *)d->pgt_buf;
+
+    if (d->pgt_buf_offset >= d->pgt_buf_size) {
+        debug_putstr("out of pgt_buf in misc.c\n");
+        return NULL;
+    }
+
+    p += d->pgt_buf_offset;
+    d->pgt_buf_offset += 4096;
+    memset(p, 0, 4096);
+
+    return p;
+}
+
+#define PGT_BUF_SIZE (4096*4)
+
+unsigned long __force_order;
+static unsigned char pgt_buf[PGT_BUF_SIZE] __aligned(4096);
+
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+    struct alloc_pgt_data data = {
+        .pgt_buf = (unsigned char *) pgt_buf,
+        .pgt_buf_size = sizeof(pgt_buf),
+        .pgt_buf_offset = 0,
+    };
+    struct x86_mapping_info mapping_info = {
+        .alloc_pgt_page = alloc_pgt_page,
+        .context = &data,
+        .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+    };
+    unsigned long end = start + size;
+    pgd_t *level4p = (pgd_t *)read_cr3();
+
+    /* align boundary to 2M */
+    start = (start >> 21) << 21;
+    end = ((end + (1<<21) - 1) >> 21) << 21;
+    if (start >= (1UL<<32))
+        kernel_ident_mapping_init(&mapping_info, level4p, start, end);
+}
+#else
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+}
+#endif
Index: linux-2.6/arch/x86/mm/ident_map.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/mm/ident_map.c
@@ -0,0 +1,74 @@
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+               unsigned long addr, unsigned long end)
+{
+    addr &= PMD_MASK;
+    for (; addr < end; addr += PMD_SIZE) {
+        pmd_t *pmd = pmd_page + pmd_index(addr);
+
+        if (!pmd_present(*pmd))
+            set_pmd(pmd, __pmd(addr | pmd_flag));
+    }
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+              unsigned long addr, unsigned long end)
+{
+    unsigned long next;
+
+    for (; addr < end; addr = next) {
+        pud_t *pud = pud_page + pud_index(addr);
+        pmd_t *pmd;
+
+        next = (addr & PUD_MASK) + PUD_SIZE;
+        if (next > end)
+            next = end;
+
+        if (pud_present(*pud)) {
+            pmd = pmd_offset(pud, 0);
+            ident_pmd_init(info->pmd_flag, pmd, addr, next);
+            continue;
+        }
+        pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+        if (!pmd)
+            return -ENOMEM;
+        ident_pmd_init(info->pmd_flag, pmd, addr, next);
+        set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+    }
+
+    return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+                  unsigned long addr, unsigned long end)
+{
+    unsigned long next;
+    int result;
+    int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+    for (; addr < end; addr = next) {
+        pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+        pud_t *pud;
+
+        next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+        if (next > end)
+            next = end;
+
+        if (pgd_present(*pgd)) {
+            pud = pud_offset(pgd, 0);
+            result = ident_pud_init(info, pud, addr, next);
+            if (result)
+                return result;
+            continue;
+        }
+
+        pud = (pud_t *)info->alloc_pgt_page(info->context);
+        if (!pud)
+            return -ENOMEM;
+        result = ident_pud_init(info, pud, addr, next);
+        if (result)
+            return result;
+        set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+    }
+
+    return 0;
+}
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -56,79 +56,7 @@

 #include "mm_internal.h"

-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
-               unsigned long addr, unsigned long end)
-{
-    addr &= PMD_MASK;
-    for (; addr < end; addr += PMD_SIZE) {
-        pmd_t *pmd = pmd_page + pmd_index(addr);
-
-        if (!pmd_present(*pmd))
-            set_pmd(pmd, __pmd(addr | pmd_flag));
-    }
-}
-static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
-              unsigned long addr, unsigned long end)
-{
-    unsigned long next;
-
-    for (; addr < end; addr = next) {
-        pud_t *pud = pud_page + pud_index(addr);
-        pmd_t *pmd;
-
-        next = (addr & PUD_MASK) + PUD_SIZE;
-        if (next > end)
-            next = end;
-
-        if (pud_present(*pud)) {
-            pmd = pmd_offset(pud, 0);
-            ident_pmd_init(info->pmd_flag, pmd, addr, next);
-            continue;
-        }
-        pmd = (pmd_t *)info->alloc_pgt_page(info->context);
-        if (!pmd)
-            return -ENOMEM;
-        ident_pmd_init(info->pmd_flag, pmd, addr, next);
-        set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
-    }
-
-    return 0;
-}
-
-int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-                  unsigned long addr, unsigned long end)
-{
-    unsigned long next;
-    int result;
-    int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
-
-    for (; addr < end; addr = next) {
-        pgd_t *pgd = pgd_page + pgd_index(addr) + off;
-        pud_t *pud;
-
-        next = (addr & PGDIR_MASK) + PGDIR_SIZE;
-        if (next > end)
-            next = end;
-
-        if (pgd_present(*pgd)) {
-            pud = pud_offset(pgd, 0);
-            result = ident_pud_init(info, pud, addr, next);
-            if (result)
-                return result;
-            continue;
-        }
-
-        pud = (pud_t *)info->alloc_pgt_page(info->context);
-        if (!pud)
-            return -ENOMEM;
-        result = ident_pud_init(info, pud, addr, next);
-        if (result)
-            return result;
-        set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
-    }
-
-    return 0;
-}
+#include "ident_map.c"

 static int __init parse_direct_gbpages_off(char *arg)
 {
From: Yinghai Lu <ying...@kernel.org>
Subject: [PATCH] x86, boot: Enable ident_mapping for kasl above 4G for 64bit

split kernel_ident_mapping_init() and call that in boot::misc.c stage.
it will cover new range kernel space that is above 4G.

Signed-off-by: Yinghai Lu <ying...@kernel.org>

---
 arch/x86/boot/compressed/misc.c     |   10 ++++
 arch/x86/boot/compressed/misc_pgt.c |   61 +++++++++++++++++++++++++++++
 arch/x86/include/asm/page.h         |    5 ++
 arch/x86/mm/ident_map.c             |   74 ++++++++++++++++++++++++++++++++++++
 arch/x86/mm/init_64.c               |   74 ------------------------------------
 5 files changed, 151 insertions(+), 73 deletions(-)

Index: linux-2.6/arch/x86/boot/compressed/misc.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/misc.c
+++ linux-2.6/arch/x86/boot/compressed/misc.c
@@ -9,6 +9,11 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
+#ifdef CONFIG_X86_64
+#define __pa(x)  ((unsigned long)(x))
+#define __va(x)  ((void *)((unsigned long)(x)))
+#endif
+
 #include "misc.h"
 #include "../string.h"
 
@@ -366,6 +371,8 @@ static void parse_elf(void *output)
 	free(phdrs);
 }
 
+#include "misc_pgt.c"
+
 asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 				  unsigned char *input_data,
 				  unsigned long input_len,
@@ -421,6 +428,9 @@ asmlinkage __visible void *decompress_ke
 		error("Wrong destination address");
 #endif
 
+	if (output != output_orig)
+		fill_linux64_pagetable((unsigned long)output, output_len);
+
 	debug_putstr("\nDecompressing Linux... ");
 	decompress(input_data, input_len, NULL, NULL, output, NULL, error);
 	parse_elf(output);
Index: linux-2.6/arch/x86/include/asm/page.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page.h
+++ linux-2.6/arch/x86/include/asm/page.h
@@ -37,7 +37,10 @@ static inline void copy_user_page(void *
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
+#ifndef __pa
 #define __pa(x)		__phys_addr((unsigned long)(x))
+#endif
+
 #define __pa_nodebug(x)	__phys_addr_nodebug((unsigned long)(x))
 /* __pa_symbol should be used for C visible symbols.
    This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@ static inline void copy_user_page(void *
 #define __pa_symbol(x) \
 	__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
 
+#ifndef __va
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+#endif
 
 #define __boot_va(x)		__va(x)
 #define __boot_pa(x)		__pa(x)
Index: linux-2.6/arch/x86/boot/compressed/misc_pgt.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/boot/compressed/misc_pgt.c
@@ -0,0 +1,61 @@
+
+#ifdef CONFIG_X86_64
+#include <asm/init.h>
+#include <asm/pgtable.h>
+
+#include "../../mm/ident_map.h"
+
+struct alloc_pgt_data {
+	unsigned char *pgt_buf;
+	unsigned long pgt_buf_size;
+	unsigned long pgt_buf_offset;
+};
+
+static void *alloc_pgt_page(void *context)
+{
+	struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
+	unsigned char *p = (unsigned char *)d->pgt_buf;
+
+	if (d->pgt_buf_offset >= d->pgt_buf_size) {
+		debug_putstr("out of pgt_buf in misc.c\n");
+		return NULL;
+	}
+
+	p += d->pgt_buf_offset;
+	d->pgt_buf_offset += 4096;
+	memset(p, 0, 4096);
+
+	return p;
+}
+
+#define PGT_BUF_SIZE (4096*4)
+
+unsigned long __force_order;
+static unsigned char pgt_buf[PGT_BUF_SIZE] __aligned(4096);
+
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+	struct alloc_pgt_data data = {
+		.pgt_buf = (unsigned char *) pgt_buf,
+		.pgt_buf_size = sizeof(pgt_buf),
+		.pgt_buf_offset = 0,
+	};
+	struct x86_mapping_info mapping_info = {
+		.alloc_pgt_page = alloc_pgt_page,
+		.context = &data,
+		.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+	};
+	unsigned long end = start + size;
+	pgd_t *level4p = (pgd_t *)read_cr3();
+
+	/* align boundary to 2M */
+	start = (start >> 21) << 21;
+	end = ((end + (1<<21) - 1) >> 21) << 21;
+	if (start >= (1UL<<32))
+		kernel_ident_mapping_init(&mapping_info, level4p, start, end);
+}
+#else
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+}
+#endif
Index: linux-2.6/arch/x86/mm/ident_map.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/mm/ident_map.c
@@ -0,0 +1,74 @@
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+			   unsigned long addr, unsigned long end)
+{
+	addr &= PMD_MASK;
+	for (; addr < end; addr += PMD_SIZE) {
+		pmd_t *pmd = pmd_page + pmd_index(addr);
+
+		if (!pmd_present(*pmd))
+			set_pmd(pmd, __pmd(addr | pmd_flag));
+	}
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+			  unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next) {
+		pud_t *pud = pud_page + pud_index(addr);
+		pmd_t *pmd;
+
+		next = (addr & PUD_MASK) + PUD_SIZE;
+		if (next > end)
+			next = end;
+
+		if (pud_present(*pud)) {
+			pmd = pmd_offset(pud, 0);
+			ident_pmd_init(info->pmd_flag, pmd, addr, next);
+			continue;
+		}
+		pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+		if (!pmd)
+			return -ENOMEM;
+		ident_pmd_init(info->pmd_flag, pmd, addr, next);
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+	}
+
+	return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+			      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	int result;
+	int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+	for (; addr < end; addr = next) {
+		pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+		pud_t *pud;
+
+		next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+
+		if (pgd_present(*pgd)) {
+			pud = pud_offset(pgd, 0);
+			result = ident_pud_init(info, pud, addr, next);
+			if (result)
+				return result;
+			continue;
+		}
+
+		pud = (pud_t *)info->alloc_pgt_page(info->context);
+		if (!pud)
+			return -ENOMEM;
+		result = ident_pud_init(info, pud, addr, next);
+		if (result)
+			return result;
+		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+	}
+
+	return 0;
+}
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -56,79 +56,7 @@
 
 #include "mm_internal.h"
 
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
-			   unsigned long addr, unsigned long end)
-{
-	addr &= PMD_MASK;
-	for (; addr < end; addr += PMD_SIZE) {
-		pmd_t *pmd = pmd_page + pmd_index(addr);
-
-		if (!pmd_present(*pmd))
-			set_pmd(pmd, __pmd(addr | pmd_flag));
-	}
-}
-static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
-			  unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-
-	for (; addr < end; addr = next) {
-		pud_t *pud = pud_page + pud_index(addr);
-		pmd_t *pmd;
-
-		next = (addr & PUD_MASK) + PUD_SIZE;
-		if (next > end)
-			next = end;
-
-		if (pud_present(*pud)) {
-			pmd = pmd_offset(pud, 0);
-			ident_pmd_init(info->pmd_flag, pmd, addr, next);
-			continue;
-		}
-		pmd = (pmd_t *)info->alloc_pgt_page(info->context);
-		if (!pmd)
-			return -ENOMEM;
-		ident_pmd_init(info->pmd_flag, pmd, addr, next);
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
-	}
-
-	return 0;
-}
-
-int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-			      unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	int result;
-	int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
-
-	for (; addr < end; addr = next) {
-		pgd_t *pgd = pgd_page + pgd_index(addr) + off;
-		pud_t *pud;
-
-		next = (addr & PGDIR_MASK) + PGDIR_SIZE;
-		if (next > end)
-			next = end;
-
-		if (pgd_present(*pgd)) {
-			pud = pud_offset(pgd, 0);
-			result = ident_pud_init(info, pud, addr, next);
-			if (result)
-				return result;
-			continue;
-		}
-
-		pud = (pud_t *)info->alloc_pgt_page(info->context);
-		if (!pud)
-			return -ENOMEM;
-		result = ident_pud_init(info, pud, addr, next);
-		if (result)
-			return result;
-		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
-	}
-
-	return 0;
-}
+#include "ident_map.c"
 
 static int __init parse_direct_gbpages_off(char *arg)
 {

Reply via email to