On Mon, 19 Mar 2001, Linus Torvalds wrote: > The complete changelog is appended, but the biggest recent change is > the mmap_sem change, which I updated with new locking rules for > pte/pmd_alloc to avoid the race on the actual page table build. > > This has only been tested on i386 without PAE, and is known to break > other architectures. Ingo, mind checking what PAE needs? [...] one nontrivial issue was that on PAE the pgd has to be installed with 'present' pgd entries, due to a CPU erratum. This means that the pgd_present() code in mm/memory.c, while correct theoretically, doesnt work with PAE. An equivalent solution is to use !pgd_none(), which also works with the PAE workaround. PAE mode could re-define pgd_present() to filter out the workaround - do you prefer this to the !pgd_none() solution? the rest was pretty straightforward. in any case, with the attached pae-2.4.3-A4 patch (against 2.4.3-pre7, applies to 2.4.2-ac24 cleanly as well) applied, 2.4.3-pre7 boots & works just fine on PAE 64GB-HIGHMEM and non-PAE kernels. - the patch also does another cleanup: removes various bad_pagetable code snippets all around the x86 tree, it's not needed anymore. This saves 8 KB RAM on x86 systems. - removed the last remaining *_kernel() macro. - fixed a minor clear_page() bug in pgalloc.h, gfp() could fail in the future. Ingo
--- linux/mm/memory.c.orig Sun Mar 25 18:55:05 2001 +++ linux/mm/memory.c Sun Mar 25 18:55:07 2001 @@ -1295,7 +1295,7 @@ * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ - if (pgd_present(*pgd)) { + if (!pgd_none(*pgd)) { pmd_free(new); goto out; } @@ -1313,7 +1313,7 @@ */ pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { - if (!pmd_present(*pmd)) { + if (pmd_none(*pmd)) { pte_t *new; /* "fast" allocation can happen without dropping the lock.. */ @@ -1329,7 +1329,7 @@ * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ - if (pmd_present(*pmd)) { + if (!pmd_none(*pmd)) { pte_free(new); goto out; } --- linux/include/asm-i386/pgalloc-3level.h.orig Sun Mar 25 18:55:05 2001 +++ linux/include/asm-i386/pgalloc-3level.h Sun Mar 25 19:23:02 2001 @@ -21,12 +21,12 @@ { unsigned long *ret; - if ((ret = pmd_quicklist) != NULL) { + ret = pmd_quicklist; + if (ret != NULL) { pmd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; - } else - ret = (unsigned long *)get_pmd_slow(); + } return (pmd_t *)ret; } @@ -41,5 +41,10 @@ { free_page((unsigned long)pmd); } + +#define pmd_free(pmd) pmd_free_fast(pmd) + +#define pgd_populate(pgd, pmd) \ + do { set_pgd(pgd, __pgd(1 + __pa(pmd))); __flush_tlb(); } while(0) #endif /* _I386_PGALLOC_3LEVEL_H */ --- linux/include/asm-i386/pgalloc.h.orig Sun Mar 25 18:56:25 2001 +++ linux/include/asm-i386/pgalloc.h Sun Mar 25 19:35:59 2001 @@ -11,7 +11,8 @@ #define pte_quicklist (current_cpu_data.pte_quick) #define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -#define pmd_populate(pmd, pte) set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) +#define pmd_populate(pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) #if CONFIG_X86_PAE # include <asm/pgalloc-3level.h> @@ -72,7 +73,8 @@ pte_t *pte; pte = (pte_t *) __get_free_page(GFP_KERNEL); - clear_page(pte); + if (pte) + clear_page(pte); return pte; } @@ -100,7 +102,6 @@ free_page((unsigned long)pte); } -#define pte_free_kernel(pte) pte_free_slow(pte) #define pte_free(pte) pte_free_slow(pte) #define pgd_free(pgd) free_pgd_slow(pgd) #define pgd_alloc() get_pgd_fast() --- linux/include/asm-i386/pgtable.h.orig Sun Mar 25 19:03:58 2001 +++ linux/include/asm-i386/pgtable.h Sun Mar 25 19:04:06 2001 @@ -243,12 +243,6 @@ /* page table for 0-4MB for everybody */ extern unsigned long pg0[1024]; -/* - * Handling allocation failures during page table setup. - */ -extern void __handle_bad_pmd(pmd_t * pmd); -extern void __handle_bad_pmd_kernel(pmd_t * pmd); - #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0) --- linux/arch/i386/mm/init.c.orig Sun Mar 25 19:02:05 2001 +++ linux/arch/i386/mm/init.c Sun Mar 25 19:02:42 2001 @@ -40,77 +40,6 @@ static unsigned long totalram_pages; static unsigned long totalhigh_pages; -/* - * BAD_PAGE is the page that is used for page faults when linux - * is out-of-memory. Older versions of linux just did a - * do_exit(), but using this instead means there is less risk - * for a process dying in kernel mode, possibly leaving an inode - * unused etc.. - * - * BAD_PAGETABLE is the accompanying page-table: it is initialized - * to point to BAD_PAGE entries. - * - * ZERO_PAGE is a special page that is used for zero-initialized - * data and COW. - */ - -/* - * These are allocated in head.S so that we get proper page alignment. - * If you change the size of these then change head.S as well. - */ -extern char empty_bad_page[PAGE_SIZE]; -#if CONFIG_X86_PAE -extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD]; -#endif -extern pte_t empty_bad_pte_table[PTRS_PER_PTE]; - -/* - * We init them before every return and make them writable-shared. - * This guarantees we get out of the kernel in some more or less sane - * way. - */ -#if CONFIG_X86_PAE -static pmd_t * get_bad_pmd_table(void) -{ - pmd_t v; - int i; - - set_pmd(&v, __pmd(_PAGE_TABLE + __pa(empty_bad_pte_table))); - - for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++) - empty_bad_pmd_table[i] = v; - - return empty_bad_pmd_table; -} -#endif - -static pte_t * get_bad_pte_table(void) -{ - pte_t v; - int i; - - v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED)); - - for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++) - empty_bad_pte_table[i] = v; - - return empty_bad_pte_table; -} - - - -void __handle_bad_pmd(pmd_t *pmd) -{ - pmd_ERROR(*pmd); - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table()))); -} - -void __handle_bad_pmd_kernel(pmd_t *pmd) -{ - pmd_ERROR(*pmd); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table()))); -} - int do_check_pgt_cache(int low, int high) { int freed = 0; --- linux/arch/i386/kernel/head.S.orig Sun Mar 25 19:04:23 2001 +++ linux/arch/i386/kernel/head.S Sun Mar 25 19:04:57 2001 @@ -415,23 +415,6 @@ ENTRY(empty_zero_page) .org 0x5000 -ENTRY(empty_bad_page) - -.org 0x6000 -ENTRY(empty_bad_pte_table) - -#if CONFIG_X86_PAE - - .org 0x7000 - ENTRY(empty_bad_pmd_table) - - .org 0x8000 - -#else - - .org 0x7000 - -#endif /* * This starts the data section. Note that the above is all --- linux/MAINTAINERS.orig Sun Mar 25 19:23:13 2001 +++ linux/MAINTAINERS Sun Mar 25 19:24:46 2001 @@ -1454,6 +1454,11 @@ L: [EMAIL PROTECTED] S: Maintained +X86 3-LEVEL PAGING (PAE) SUPPORT +P: Ingo Molnar +M: [EMAIL PROTECTED] +S: Maintained + Z85230 SYNCHRONOUS DRIVER P: Alan Cox M: [EMAIL PROTECTED]