On 06/24/19 at 03:31pm, Kirill A. Shutemov wrote: > Kyle has reported that kernel crashes sometimes when it boots in > 5-level paging mode with KASLR enabled: > > WARNING: CPU: 0 PID: 0 at arch/x86/mm/init_64.c:87 phys_p4d_init+0x1d4/0x1ea > RIP: 0010:phys_p4d_init+0x1d4/0x1ea > Call Trace: > __kernel_physical_mapping_init+0x10a/0x35c > kernel_physical_mapping_init+0xe/0x10 > init_memory_mapping+0x1aa/0x3b0 > init_range_memory_mapping+0xc8/0x116 > init_mem_mapping+0x225/0x2eb > setup_arch+0x6ff/0xcf5 > start_kernel+0x64/0x53b > ? copy_bootdata+0x1f/0xce > x86_64_start_reservations+0x24/0x26 > x86_64_start_kernel+0x8a/0x8d > secondary_startup_64+0xb6/0xc0 > > which causes later: > > BUG: unable to handle page fault for address: ff484d019580eff8 > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > BAD > Oops: 0000 [#1] SMP NOPTI > RIP: 0010:fill_pud+0x13/0x130 > Call Trace: > set_pte_vaddr_p4d+0x2e/0x50 > set_pte_vaddr+0x6f/0xb0 > __native_set_fixmap+0x28/0x40 > native_set_fixmap+0x39/0x70 > register_lapic_address+0x49/0xb6 > early_acpi_boot_init+0xa5/0xde > setup_arch+0x944/0xcf5 > start_kernel+0x64/0x53b > > Kyle bisected the issue to commit b569c1843498 ("x86/mm/KASLR: Reduce > randomization granularity for 5-level paging to 1GB") > > Before the commit PAGE_OFFSET is always aligned to P4D_SIZE if we boot in > 5-level paging mode. But now only PUD_SIZE alignment is guaranteed. > > For phys_p4d_init() it means that 'paddr_next' after the first iteration > can belong to the same p4d entry. > > In the case I was able to reproduce the 'vaddr' on the first iteration > is 0xff4228027fe00000 and 'paddr' is 0x33fe00000. On the second > iteration 'vaddr' becomes 0xff42287f40000000 and 'paddr' is > 0x8000000000. The 'vaddr' in both cases belong to the same p4d entry. > > It screws up 'i' count: we miss the last entry in the page table > completely. And it confuses the branch under 'paddr >= paddr_end' > condition: the p4d entry can be cleared where must not to. > > The patch makes phys_p4d_init() walk by virtual address space, like > __kernel_physical_mapping_init() does. It makes it work correctly with > phys-virt mismatch. > > Signed-off-by: Kirill A. Shutemov <kirill.shute...@linux.intel.com> > Reported-and-tested-by: Kyle Pelton <kyle.d.pel...@intel.com> > Fixes: b569c1843498 ("x86/mm/KASLR: Reduce randomization granularity for > 5-level paging to 1GB") > Cc: Baoquan He <b...@redhat.com> > Signed-off-by: Kirill A. Shutemov <kirill.shute...@linux.intel.com> > --- > arch/x86/mm/init_64.c | 24 +++++++++++++----------- > 1 file changed, 13 insertions(+), 11 deletions(-)
Looks good to me, thanks. Acked-by: Baoquan He <b...@redhat.com> > > diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c > index 693aaf28d5fe..0f01c7b1d217 100644 > --- a/arch/x86/mm/init_64.c > +++ b/arch/x86/mm/init_64.c > @@ -671,23 +671,25 @@ static unsigned long __meminit > phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, > unsigned long page_size_mask, bool init) > { > - unsigned long paddr_next, paddr_last = paddr_end; > - unsigned long vaddr = (unsigned long)__va(paddr); > - int i = p4d_index(vaddr); > + unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; > + > + paddr_last = paddr_end; > + vaddr = (unsigned long)__va(paddr); > + vaddr_end = (unsigned long)__va(paddr_end); > > if (!pgtable_l5_enabled()) > return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, > page_size_mask, init); > > - for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { > - p4d_t *p4d; > + for (; vaddr < vaddr_end; vaddr = vaddr_next) { > + p4d_t *p4d = p4d_page + p4d_index(vaddr); > pud_t *pud; > > - vaddr = (unsigned long)__va(paddr); > - p4d = p4d_page + p4d_index(vaddr); > - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; > + vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; > + paddr = __pa(vaddr); > > if (paddr >= paddr_end) { > + paddr_next = __pa(vaddr_next); > if (!after_bootmem && > !e820__mapped_any(paddr & P4D_MASK, paddr_next, > E820_TYPE_RAM) && > @@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, > unsigned long paddr_end, > > if (!p4d_none(*p4d)) { > pud = pud_offset(p4d, 0); > - paddr_last = phys_pud_init(pud, paddr, paddr_end, > - page_size_mask, init); > + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), > + page_size_mask, init); > continue; > } > > pud = alloc_low_page(); > - paddr_last = phys_pud_init(pud, paddr, paddr_end, > + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), > page_size_mask, init); > > spin_lock(&init_mm.page_table_lock); > -- > 2.21.0 >