On Wed, Feb 13, 2019 at 4:28 PM Kairui Song <kas...@redhat.com> wrote: > > On machines where the GART aperture is mapped over physical RAM, > /proc/kcore contains the GART aperture range and reading it may lead > to kernel panic. > > In 'commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")', > a workaround is applied for vmcore to let /proc/vmcore return zeroes > when attempting to read the GART region, as vmcore have the same issue, > and after 'commit 707d4eefbdb3 ("Revert "[PATCH] Insert GART region > into resource map"")', userspace tools won't be able to detect GART > region so have to avoid it from being reading in kernel. > > This patch applies a similar workaround for kcore. Let /proc/kcore > return zeroes for GART aperture. > > Both vmcore and kcore maintain a memory mapping list, in the vmcore > workaround we exclude the GART region by registering a hook for checking > if PFN is valid before reading, because vmcore's memory mapping could > be generated by userspace which doesn't know about GART. But for kcore > it will be simpler to just alter the memory area list, kcore's area list > is always generated by kernel on init. > > Kcore's memory area list is generated very late so can't exclude the > overlapped area when GART is initialized, instead simply introduce a > new area enum type KCORE_NORAM, register GART aperture as KCORE_NORAM > and let kcore return zeros for all KCORE_NORAM area. This fixes the > problem well with minor code changes. > > --- > Update from V2: > Instead of repeating the same hook infrastructure for kcore, introduce > a new kcore area type to avoid reading from, and let kcore always bypass > this kind of area. > > Update from V1: > Fix a complie error when CONFIG_PROC_KCORE is not set > > arch/x86/kernel/aperture_64.c | 14 ++++++++++++++ > fs/proc/kcore.c | 13 +++++++++++++ > include/linux/kcore.h | 1 + > 3 files changed, 28 insertions(+) > > diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c > index 58176b56354e..5fb04bdd3221 100644 > --- a/arch/x86/kernel/aperture_64.c > +++ b/arch/x86/kernel/aperture_64.c > @@ -31,6 +31,7 @@ > #include <asm/amd_nb.h> > #include <asm/x86_init.h> > #include <linux/crash_dump.h> > +#include <linux/kcore.h> > > /* > * Using 512M as goal, in case kexec will load kernel_big > @@ -84,6 +85,17 @@ static void exclude_from_vmcore(u64 aper_base, u32 > aper_order) > } > #endif > > +#ifdef CONFIG_PROC_KCORE > +static struct kcore_list kcore_gart; > + > +static void __init exclude_from_kcore(u64 aper_base, u32 aper_order) { > + u32 aper_size = (32 * 1024 * 1024) << aper_order; > + kclist_add(&kcore_gart, __va(aper_base), aper_size, KCORE_NORAM); > +} > +#else > +static inline void __init exclude_from_kcore(u64 aper_base, u32 aper_order) > { } > +#endif > + > /* This code runs before the PCI subsystem is initialized, so just > access the northbridge directly. */ > > @@ -475,6 +487,7 @@ int __init gart_iommu_hole_init(void) > * and fixed up the northbridge > */ > exclude_from_vmcore(last_aper_base, last_aper_order); > + exclude_from_kcore(last_aper_base, last_aper_order); > > return 1; > } > @@ -521,6 +534,7 @@ int __init gart_iommu_hole_init(void) > * range through vmcore even though it should be part of the dump. > */ > exclude_from_vmcore(aper_alloc, aper_order); > + exclude_from_kcore(aper_alloc, aper_order); > > /* Fix up the north bridges */ > for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { > diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c > index bbcc185062bb..15e0d74d7c56 100644 > --- a/fs/proc/kcore.c > +++ b/fs/proc/kcore.c > @@ -75,6 +75,8 @@ static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, > size_t *notes_len, > size = 0; > > list_for_each_entry(m, &kclist_head, list) { > + if (m->type == KCORE_NORAM) > + continue; > try = kc_vaddr_to_offset((size_t)m->addr + m->size); > if (try > size) > size = try; > @@ -256,6 +258,9 @@ static int kcore_update_ram(void) > list_for_each_entry_safe(pos, tmp, &kclist_head, list) { > if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP) > list_move(&pos->list, &garbage); > + /* Move NORAM area to head of the new list */ > + if (pos->type == KCORE_NORAM) > + list_move(&pos->list, &list); > } > list_splice_tail(&list, &kclist_head); > > @@ -356,6 +361,8 @@ read_kcore(struct file *file, char __user *buffer, size_t > buflen, loff_t *fpos) > > phdr = &phdrs[1]; > list_for_each_entry(m, &kclist_head, list) { > + if (m->type == KCORE_NORAM) > + continue; > phdr->p_type = PT_LOAD; > phdr->p_flags = PF_R | PF_W | PF_X; > phdr->p_offset = kc_vaddr_to_offset(m->addr) + > data_offset; > @@ -465,6 +472,12 @@ read_kcore(struct file *file, char __user *buffer, > size_t buflen, loff_t *fpos) > goto out; > } > m = NULL; /* skip the list anchor */ > + } else if (m->type == KCORE_NORAM) { > + /* for NORAM area just fill zero */ > + if (clear_user(buffer, tsz)) { > + ret = -EFAULT; > + goto out; > + } > } else if (m->type == KCORE_VMALLOC) { > vread(buf, (char *)start, tsz); > /* we have to zero-fill user buffer even if no read */ > diff --git a/include/linux/kcore.h b/include/linux/kcore.h > index 8c3f8c14eeaa..372a4093f794 100644 > --- a/include/linux/kcore.h > +++ b/include/linux/kcore.h > @@ -13,6 +13,7 @@ enum kcore_type { > KCORE_USER, > KCORE_OTHER, > KCORE_REMAP, > + KCORE_NORAM, > }; > > struct kcore_list { > -- > 2.20.1 >
Hi Jiri, Could you help have a look of this fix too? I saw your reviewed V1, and this V3 changed the approach a lot. Thanks! -- Best Regards, Kairui Song