Jeremy Fitzhardinge <[EMAIL PROTECTED]> writes: > When constructing the initial pagetable in pagetable_init, make sure > that non-PSE pmds are updated to PSE ones. This fixes a bug in the > paravirt pagetable init code, which otherwise tries to avoid overwrite > existing mappings. > > This moves the definition of pmd_huge() out of the hugetlbfs files > into pgtable.h. > > [ I know Eric would like to make larger changes to the way > pagetable init works, but this patch is the minimal fix to an > existing bug. ]
My preference would be for whoever had: paravirt_ops-hooks-to-set-up-initial-pagetable.patch queued to drop it until we can get a version that doesn't break early page table setup. Your partial fix still leaves the real page tables in a partially incorrect state, and even if we removed your changes from the PSE path we still can wind up not failing to set _PAGE_NX in the appropriate places on 4K pages. I have tried to be constructive and suggest how we can fix this cleanly. Short of that this is what I see needing to happen to fix the above patches changes to arch/i386/mm/init.c. Eric ... Subject: [PATCH] i386: During page table initialization always set the leaf page table entries. If we don't set the leaf page table entries it is quite possible that we will inherit and incorrect page table entry from the initial boot page table setup in head.S. So we need to redo the effort here. I don't know what to do about hypervisors like Xen that require their page tables to be read only, as our identity mapped page table entries currently violate that requirement. All I know if the kernel doesn't work properly on native hardware it is a bug. Signed-off-by: Eric W. Biederman <[EMAIL PROTECTED]> --- arch/i386/mm/init.c | 52 +++++++++++++++++++------------------------------- 1 files changed, 20 insertions(+), 32 deletions(-) diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index b77a43c..dbe16f6 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -63,18 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) pmd_t *pmd_table; #ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - - paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - if (pmd_table != pmd_offset(pud, 0)) - BUG(); -#else + if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { + pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); + if (pmd_table != pmd_offset(pud, 0)) + BUG(); + } +#endif pud = pud_offset(pgd, 0); pmd_table = pmd_offset(pud, 0); -#endif - return pmd_table; } @@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) */ static pte_t * __init one_page_table_init(pmd_t *pmd) { - if (pmd_none(*pmd)) { + if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); @@ -109,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; - pud_t *pud; pmd_t *pmd; int pgd_idx, pmd_idx; unsigned long vaddr; @@ -120,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end pgd = pgd_base + pgd_idx; for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) - one_md_table_init(pgd); - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); + pmd = one_md_table_init(pgd); + pmd = pmd + pmd_index(vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - if (pmd_none(*pmd)) - one_page_table_init(pmd); + one_page_table_init(pmd); vaddr += PMD_SIZE; } @@ -159,11 +155,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pfn = 0; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) - pmd = one_md_table_init(pgd); - else - pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET); - + pmd = one_md_table_init(pgd); if (pfn >= max_low_pfn) continue; for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { @@ -172,12 +164,11 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) /* Map with big pages if possible, otherwise create normal page tables. */ if (cpu_has_pse) { unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - if (!pmd_present(*pmd)) { - if (is_kernel_text(address) || is_kernel_text(address2)) - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); - else - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); - } + if (is_kernel_text(address) || is_kernel_text(address2)) + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); + else + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); + pfn += PTRS_PER_PTE; } else { pte = one_page_table_init(pmd); @@ -185,9 +176,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { - if (pte_present(*pte)) - continue; - if (is_kernel_text(address)) set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); else -- 1.5.1.1.181.g2de0 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/