On 03/26/2020 11:37 PM, James Morse wrote:
> An image loaded for kexec is not stored in place, instead its segments
> are scattered through memory, and are re-assembled when needed. In the
> meantime, the target memory may have been removed.
>
> Because mm is not aware that this memory is still in use, it allows it
> to be removed.
Why the isolation process does not fail when these pages are currently
being used by kexec ?
>
> Add a memory notifier to prevent the removal of memory regions that
> overlap with a loaded kexec image segment. e.g., when triggered from the
> Qemu console:
> | kexec_core: memory region in use
> | memory memory32: Offline failed.
Yes this is definitely an added protection for these kexec loaded kernels
memory areas from being offlined but I would have expected the preceding
offlining to have failed as well.
>
> Signed-off-by: James Morse <james.mo...@arm.com>
> ---
> kernel/kexec_core.c | 56 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 56 insertions(+)
>
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index c19c0dad1ebe..ba1d91e868ca 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -12,6 +12,7 @@
> #include <linux/slab.h>
> #include <linux/fs.h>
> #include <linux/kexec.h>
> +#include <linux/memory.h>
> #include <linux/mutex.h>
> #include <linux/list.h>
> #include <linux/highmem.h>
> @@ -22,10 +23,12 @@
> #include <linux/elf.h>
> #include <linux/elfcore.h>
> #include <linux/utsname.h>
> +#include <linux/notifier.h>
> #include <linux/numa.h>
> #include <linux/suspend.h>
> #include <linux/device.h>
> #include <linux/freezer.h>
> +#include <linux/pfn.h>
> #include <linux/pm.h>
> #include <linux/cpu.h>
> #include <linux/uaccess.h>
> @@ -1219,3 +1222,56 @@ void __weak arch_kexec_protect_crashkres(void)
>
> void __weak arch_kexec_unprotect_crashkres(void)
> {}
> +
> +/*
> + * If user-space wants to offline memory that is in use by a loaded kexec
> + * image, it should unload the image first.
> + */
Probably this would need kexec user manual and related system call man pages
update as well.
> +static int mem_remove_cb(struct notifier_block *nb, unsigned long action,
> + void *data)
> +{
> + int rv = NOTIFY_OK, i;
> + struct memory_notify *arg = data;
> + unsigned long pfn = arg->start_pfn;
> + unsigned long nr_segments, sstart, send;
> + unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
> +
> + might_sleep();
Required ?
> +
> + if (action != MEM_GOING_OFFLINE)
> + return NOTIFY_DONE;
> +
> + mutex_lock(&kexec_mutex);
> + if (kexec_image) {
> + nr_segments = kexec_image->nr_segments;
> +
> + for (i = 0; i < nr_segments; i++) {
> + sstart = PFN_DOWN(kexec_image->segment[i].mem);
> + send = PFN_UP(kexec_image->segment[i].mem +
> + kexec_image->segment[i].memsz);
> +
> + if ((pfn <= sstart && sstart < end_pfn) ||
> + (pfn <= send && send < end_pfn)) {
> + pr_warn("Memory region in use\n");
> + rv = NOTIFY_BAD;
> + break;
> + }
> + }
> + }
> + mutex_unlock(&kexec_mutex);
> +
> + return rv;
Variable 'rv' is redundant, should use NOTIFY_[BAD|OK] directly instead.
> +}
> +
> +static struct notifier_block mem_remove_nb = {
> + .notifier_call = mem_remove_cb,
> +};
> +
> +static int __init register_mem_remove_cb(void)
> +{
> + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
Should not all these new code here be wrapped with CONFIG_MEMORY_HOTREMOVE
to reduce the scope as well as final code size when the config is disabled.
> + return register_memory_notifier(&mem_remove_nb);
> +
> + return 0;
> +}
> +device_initcall(register_mem_remove_cb);
>
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec