Re: [PATCH 1/2] zboot: enable arm64 kexec_load for zboot image
Hi, On Wed, 13 Sept 2023 at 08:54, Pingfan Liu wrote: > > On Mon, Sep 11, 2023 at 6:37 PM Dave Young wrote: > > > > kexec_file_load support of zboot kernel image decompressed the vmlinuz, > > so in kexec_load code just load the kernel with reading the decompressed > > kernel fd into a new buffer and use it directly. > > > > Signed-off-by: Dave Young > > --- > > include/kexec-pe-zboot.h | 3 ++- > > kexec/arch/arm64/kexec-vmlinuz-arm64.c | 20 ++-- > > kexec/kexec-pe-zboot.c | 4 +++- > > kexec/kexec.c | 2 +- > > kexec/kexec.h | 1 + > > 5 files changed, 25 insertions(+), 5 deletions(-) > > > > diff --git a/include/kexec-pe-zboot.h b/include/kexec-pe-zboot.h > > index e2e0448a81f2..374916cbe883 100644 > > --- a/include/kexec-pe-zboot.h > > +++ b/include/kexec-pe-zboot.h > > @@ -11,5 +11,6 @@ struct linux_pe_zboot_header { > > uint32_t compress_type; > > }; > > > > -int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd); > > +int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd, > > + off_t *kernel_size); > > #endif > > diff --git a/kexec/arch/arm64/kexec-vmlinuz-arm64.c > > b/kexec/arch/arm64/kexec-vmlinuz-arm64.c > > index c0ee47c8f50a..8f378d8fa6d0 100644 > > --- a/kexec/arch/arm64/kexec-vmlinuz-arm64.c > > +++ b/kexec/arch/arm64/kexec-vmlinuz-arm64.c > > @@ -34,6 +34,7 @@ > > #include "arch/options.h" > > > > static int kernel_fd = -1; > > +static off_t decompressed_size; > > > > /* Returns: > > * -1 : in case of error/invalid format (not a valid PE+compressed ZBOOT > > format. > > @@ -72,7 +73,7 @@ int pez_arm64_probe(const char *kernel_buf, off_t > > kernel_size) > > return -1; > > } > > > > - ret = pez_prepare(buf, buf_sz, &kernel_fd); > > + ret = pez_prepare(buf, buf_sz, &kernel_fd, &decompressed_size); > > > > if (!ret) { > > /* validate the arm64 specific header */ > > @@ -98,8 +99,23 @@ bad_header: > > int pez_arm64_load(int argc, char **argv, const char *buf, off_t len, > > struct kexec_info *info) > > { > > + char *kbuf; > > + > > info->kernel_fd = kernel_fd; > > - return image_arm64_load(argc, argv, buf, len, info); > > + if (kernel_fd > 0 && decompressed_size > 0) { > > + off_t nread; > > + > > + kbuf = slurp_fd(kernel_fd, NULL, decompressed_size, &nread); > > + if (!kbuf || nread != decompressed_size) { Today in another test I found that this breaks the kexec_file_load because the slurp_fd() closed the kernel_fd after readding out the buffer, I will send another version soon, also cleanup a bit about this function. Thanks for reviewing. > > + dbgprintf("%s: failed.\n", __func__); > > + return -1; > > + } > > + } else { > > + dbgprintf("%s: wrong file descriptor.\n", __func__); > > + return -1; > > + } > > + > > + return image_arm64_load(argc, argv, kbuf, decompressed_size, info); > > } > > > > void pez_arm64_usage(void) > > diff --git a/kexec/kexec-pe-zboot.c b/kexec/kexec-pe-zboot.c > > index 2f2e052b76c5..3abd17d9fe59 100644 > > --- a/kexec/kexec-pe-zboot.c > > +++ b/kexec/kexec-pe-zboot.c > > @@ -37,7 +37,8 @@ > > * > > * crude_buf: the content, which is read from the kernel file without any > > processing > > */ > > -int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd) > > +int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd, > > + off_t *kernel_size) > > { > > int ret = -1; > > int fd = 0; > > @@ -110,6 +111,7 @@ int pez_prepare(const char *crude_buf, off_t buf_sz, > > int *kernel_fd) > > goto fail_bad_header; > > } > > > > + *kernel_size = decompressed_size; > > dbgprintf("%s: done\n", __func__); > > > > ret = 0; > > diff --git a/kexec/kexec.c b/kexec/kexec.c > > index c3b182e254e0..1edbd349c86d 100644 > > --- a/kexec/kexec.c > > +++ b/kexec/kexec.c > > @@ -489,7 +489,7 @@ static int add_backup_segments(struct kexec_info *info, > > return 0; > > } > > > > -static char *slurp_fd(int fd, const char *filename, off_t size, off_t > > *nread) > > +char *slurp_fd(int fd, const char *filename, off_t size, off_t *nread) > > { > > char *buf; > > off_t progress; > > diff --git a/kexec/kexec.h b/kexec/kexec.h > > index ed3b499a80f2..093338969c57 100644 > > --- a/kexec/kexec.h > > +++ b/kexec/kexec.h > > @@ -267,6 +267,7 @@ extern void die(const char *fmt, ...) > > __attribute__ ((format (printf, 1, 2))); > > extern void *xmalloc(size_t size); > > extern void *xrealloc(void *ptr, size_t size); > > +extern char *slurp_fd(int fd, const char *filename, off_t size, off_t > > *nread); > > extern char *slurp_file(const char *filename
Re: [PATCH 1/2] zboot: enable arm64 kexec_load for zboot image
On Mon, Sep 11, 2023 at 6:37 PM Dave Young wrote: > > kexec_file_load support of zboot kernel image decompressed the vmlinuz, > so in kexec_load code just load the kernel with reading the decompressed > kernel fd into a new buffer and use it directly. > > Signed-off-by: Dave Young > --- > include/kexec-pe-zboot.h | 3 ++- > kexec/arch/arm64/kexec-vmlinuz-arm64.c | 20 ++-- > kexec/kexec-pe-zboot.c | 4 +++- > kexec/kexec.c | 2 +- > kexec/kexec.h | 1 + > 5 files changed, 25 insertions(+), 5 deletions(-) > > diff --git a/include/kexec-pe-zboot.h b/include/kexec-pe-zboot.h > index e2e0448a81f2..374916cbe883 100644 > --- a/include/kexec-pe-zboot.h > +++ b/include/kexec-pe-zboot.h > @@ -11,5 +11,6 @@ struct linux_pe_zboot_header { > uint32_t compress_type; > }; > > -int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd); > +int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd, > + off_t *kernel_size); > #endif > diff --git a/kexec/arch/arm64/kexec-vmlinuz-arm64.c > b/kexec/arch/arm64/kexec-vmlinuz-arm64.c > index c0ee47c8f50a..8f378d8fa6d0 100644 > --- a/kexec/arch/arm64/kexec-vmlinuz-arm64.c > +++ b/kexec/arch/arm64/kexec-vmlinuz-arm64.c > @@ -34,6 +34,7 @@ > #include "arch/options.h" > > static int kernel_fd = -1; > +static off_t decompressed_size; > > /* Returns: > * -1 : in case of error/invalid format (not a valid PE+compressed ZBOOT > format. > @@ -72,7 +73,7 @@ int pez_arm64_probe(const char *kernel_buf, off_t > kernel_size) > return -1; > } > > - ret = pez_prepare(buf, buf_sz, &kernel_fd); > + ret = pez_prepare(buf, buf_sz, &kernel_fd, &decompressed_size); > > if (!ret) { > /* validate the arm64 specific header */ > @@ -98,8 +99,23 @@ bad_header: > int pez_arm64_load(int argc, char **argv, const char *buf, off_t len, > struct kexec_info *info) > { > + char *kbuf; > + > info->kernel_fd = kernel_fd; > - return image_arm64_load(argc, argv, buf, len, info); > + if (kernel_fd > 0 && decompressed_size > 0) { > + off_t nread; > + > + kbuf = slurp_fd(kernel_fd, NULL, decompressed_size, &nread); > + if (!kbuf || nread != decompressed_size) { > + dbgprintf("%s: failed.\n", __func__); > + return -1; > + } > + } else { > + dbgprintf("%s: wrong file descriptor.\n", __func__); > + return -1; > + } > + > + return image_arm64_load(argc, argv, kbuf, decompressed_size, info); > } > > void pez_arm64_usage(void) > diff --git a/kexec/kexec-pe-zboot.c b/kexec/kexec-pe-zboot.c > index 2f2e052b76c5..3abd17d9fe59 100644 > --- a/kexec/kexec-pe-zboot.c > +++ b/kexec/kexec-pe-zboot.c > @@ -37,7 +37,8 @@ > * > * crude_buf: the content, which is read from the kernel file without any > processing > */ > -int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd) > +int pez_prepare(const char *crude_buf, off_t buf_sz, int *kernel_fd, > + off_t *kernel_size) > { > int ret = -1; > int fd = 0; > @@ -110,6 +111,7 @@ int pez_prepare(const char *crude_buf, off_t buf_sz, int > *kernel_fd) > goto fail_bad_header; > } > > + *kernel_size = decompressed_size; > dbgprintf("%s: done\n", __func__); > > ret = 0; > diff --git a/kexec/kexec.c b/kexec/kexec.c > index c3b182e254e0..1edbd349c86d 100644 > --- a/kexec/kexec.c > +++ b/kexec/kexec.c > @@ -489,7 +489,7 @@ static int add_backup_segments(struct kexec_info *info, > return 0; > } > > -static char *slurp_fd(int fd, const char *filename, off_t size, off_t *nread) > +char *slurp_fd(int fd, const char *filename, off_t size, off_t *nread) > { > char *buf; > off_t progress; > diff --git a/kexec/kexec.h b/kexec/kexec.h > index ed3b499a80f2..093338969c57 100644 > --- a/kexec/kexec.h > +++ b/kexec/kexec.h > @@ -267,6 +267,7 @@ extern void die(const char *fmt, ...) > __attribute__ ((format (printf, 1, 2))); > extern void *xmalloc(size_t size); > extern void *xrealloc(void *ptr, size_t size); > +extern char *slurp_fd(int fd, const char *filename, off_t size, off_t > *nread); > extern char *slurp_file(const char *filename, off_t *r_size); > extern char *slurp_file_mmap(const char *filename, off_t *r_size); > extern char *slurp_file_len(const char *filename, off_t size, off_t *nread); > -- > 2.37.2 > LGTM, Reviewed-by: Pingfan Liu ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH V1 0/3] riscv: kexec: cleanup and fixups
Hello: This series was applied to riscv/linux.git (fixes) by Palmer Dabbelt : On Thu, 7 Sep 2023 18:33:01 +0800 you wrote: > Hi, > > I had posted 2 patches [1][2] for riscv/kexec, but there was no "effective" > response to them until now, so I merged them in this series with the 3rd > fixup. > > Anyway, this series contains a cleanup for riscv_kexec_relocate() and two > fixups > for KEXEC_FILE and had passed the basic kexec test in my 64bit Qemu-virt. > > [...] Here is the summary with links: - [V1,1/3] riscv: kexec: Cleanup riscv_kexec_relocate (no matching commit) - [V1,2/3] riscv: kexec: Align the kexeced kernel entry https://git.kernel.org/riscv/c/1bfb2b618d52 - [V1,3/3] riscv: kexec: Remove -fPIE for PURGATORY_CFLAGS (no matching commit) You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/1] x86/kexec: UKI support
> These are sort of "tautological" arguments. There must be some > objective reasons why this architecture was chosen instead of > other (i.e. using what already pre-exists). I think I misunderstood you in my earlier reply. I do not understand in what way you think my arguments are tautological. Can you elaborate? ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/1] x86/kexec: UKI support
> These are sort of "tautological" arguments. There must be some > objective reasons why this architecture was chosen instead of > other (i.e. using what already pre-exists). You mean like your argument that the same can already be achieved with the normal EFI stub and builin initrd/cmdline? ;) I think only reasons #4 and the last paragraph in me response relate to it being pre-existing. The other reasons are actual limitations with the normal EFI stub setup. Doesn't mean that they can't be overcome, but UKIs work. I'm not sure what the initial reasons where for coming up with this architecture were, I was not involved. What I can tell you is that right now it is a format that has practical advantages and that there are generic mainstream distros looking to adopt it. So having the capability to kexec them is gonna come in handy. ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v2 0/2] x86/kexec: UKI Support
> By the way, could you provide detailed steps about how to test this > patchset so that people interested can give it a shot? Sure. 0. Build and run your kernel with my patches. 1. You're gonna need a UKI to kexec. I personally use sbctl or ukify to create them. sbctl command example (assumes you have the cmdline you want saved in a file called cmdline): sudo sbctl bundle -k /boot/vmlinuz-6.5.2 -f /boot/initrd.img-6.5.2 -c ./cmdline -s ./uki.efi ukify command example: sudo python3 ukify.py build --linux=/boot/vmlinuz-6.5.2 --initrd=/boot/initrd.img-6.5.2 --cmdline 2. If you are running in lockdown mode you'll have to sign the UKI. You can use sbctl, pesign, or sbsign for example. 3. Compile kexec-test (see links below). Simple "gcc main.c -o kexec-test" should work 4. Do the kexec load: ./kexec-text (this is equivalent to "kexec -a -l ", however that currently complains about not recognizing the format) 5. At this point it's useful to check if the loading succeeded with: "cat /sys/kernel/kexec_loaded" (should return "1") 6. Do a kexec reboot. If you are running systemd, the best way is with: "systemctl kexec". Otherwise you can try "kexec -e", however this will not shut all your services down If anyone has problems please feel free to ask. Links: sbctl: https://github.com/Foxboron/sbctl ukify: https://github.com/systemd/systemd/tree/main/src/ukify kexec-test: https://github.com/Cydox/kexec-test/ ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/1] x86/kexec: UKI support
On Tue Sep 12, 2023 at 9:56 PM EEST, Jan Hendrik Farr wrote: > > What sort of bottleneck does the EFI stub have so that we need yet > > another envelope? > > Well I can come up with a few advantages of UKI compared to normal bzImage > with builtin initrd and cmdline. > > 1. You already identified this one. Using addons to adjust your cmdline It is not a benefit as this is already possible today. > 2. I can use my normal initramfs generation tooling. Just install my > compiled kernel, my distros install script will generate the > initramfs. Then I package it up as a UKI. This will be a lot more > awkward with a builtin initrd. > 3. Measured boot. You can place PCR signatures in the UKI using > systemd-measure. This will sign the expected PCR values for booting > this UKI. I think with normal bzImage this will be a lot more > difficult. If you place those PCR signatures in the builtin initrd > this will change the kernel image which means now the values you > signed no longer match (depending on how you measure the kernel; I > don't think the normal EFI stub even measures the kernel in first > place, but I could be mistaken about this) > 4. UKIs are automatically recognized by systemd-boot These are sort of "tautological" arguments. There must be some objective reasons why this architecture was chosen instead of other (i.e. using what already pre-exists). BR, Jarkko ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/1] x86/kexec: UKI support
> What sort of bottleneck does the EFI stub have so that we need yet > another envelope? Well I can come up with a few advantages of UKI compared to normal bzImage with builtin initrd and cmdline. 1. You already identified this one. Using addons to adjust your cmdline 2. I can use my normal initramfs generation tooling. Just install my compiled kernel, my distros install script will generate the initramfs. Then I package it up as a UKI. This will be a lot more awkward with a builtin initrd. 3. Measured boot. You can place PCR signatures in the UKI using systemd-measure. This will sign the expected PCR values for booting this UKI. I think with normal bzImage this will be a lot more difficult. If you place those PCR signatures in the builtin initrd this will change the kernel image which means now the values you signed no longer match (depending on how you measure the kernel; I don't think the normal EFI stub even measures the kernel in first place, but I could be mistaken about this) 4. UKIs are automatically recognized by systemd-boot There's probably more reasons. The main reason for me to go with UKIs initially was the good tooling around them. You could probably overcome some of these drawbacks in the default kernel EFI stub. For example it could also get a place to put signed PCR values. And it could also do TPM measurements. However in the process all you're doing is rebuilding what already exists today in systemd-stub and the tooling around UKIs. ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCHv6 1/3] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt
Hi Pingfan, kernel test robot noticed the following build errors: [auto build test ERROR on powerpc/fixes] [also build test ERROR on linus/master v6.6-rc1 next-20230912] [cannot apply to powerpc/next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Pingfan-Liu/powerpc-setup-Loosen-the-mapping-between-cpu-logical-id-and-its-seq-in-dt/20230911-213042 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes patch link: https://lore.kernel.org/r/20230911131855.40738-2-piliu%40redhat.com patch subject: [PATCHv6 1/3] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt config: powerpc-randconfig-r032-20230912 (https://download.01.org/0day-ci/archive/20230913/202309130232.n2rewhbv-...@intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230913/202309130232.n2rewhbv-...@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot | Closes: https://lore.kernel.org/oe-kbuild-all/202309130232.n2rewhbv-...@intel.com/ All errors (new ones prefixed by >>): >> ld.lld: error: undefined symbol: boot_cpu_hwid >>> referenced by setup-common.c:0 (arch/powerpc/kernel/setup-common.c:0) >>> arch/powerpc/kernel/setup-common.o:(smp_setup_cpu_maps) in archive vmlinux.a >>> referenced by setup-common.c:0 (arch/powerpc/kernel/setup-common.c:0) >>> arch/powerpc/kernel/setup-common.o:(smp_setup_cpu_maps) in archive vmlinux.a -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/1] x86/kexec: UKI support
On Tue Sep 12, 2023 at 6:32 PM EEST, Jan Hendrik Farr wrote: > >> The format itself is rather simple. It's just a PE file (as required > >> by the UEFI spec) that contains a small stub application in the .text, > >> .data, etc sections that is responsible for invoking the contained > >> kernel and initrd with the contained cmdline. The kernel image is > >> placed into a .kernel section, the initrd into a .initrd section, and > >> the cmdline into a .cmdline section in the PE executable. > > > > How does this interact with the existing EFI stub support in linux? > > It doesn't. During normal boot of a UKI the stub in it is used > (systemd-stub, see: > https://www.freedesktop.org/software/systemd/man/systemd-stub.html). > The kernel's own EFI stub will still be in the binary inside the > .linux section but not used. What sort of bottleneck does the EFI stub have so that we need yet another envelope? BR, Jarkko ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/1] x86/kexec: UKI support
On Tue, Sep 12, 2023, at 12:33 PM, Jarkko Sakkinen wrote: > On Tue Sep 12, 2023 at 1:54 AM EEST, Jan Hendrik Farr wrote: >> > What the heck is UKI? >> >> UKI (Unified Kernel Image) is the kernel image + initrd + cmdline (+ >> some other optional stuff) all packaged up together as one EFI >> application. >> >> This EFI application can then be launched directly by the UEFI without >> the need for any additional stuff (or by systemd-boot). It's all self >> contained. One benefit is that this is a convenient way to distribute >> kernels all in one file. Another benefit is that the whole combination >> of kernel image, initrd, and cmdline can all be signed together so >> only that particular combination can be executed if you are using >> secure boot. > > Is this also for generic purpose distributions? I mean it is not > uncommon having to tweak the command-line in a workstation. This is for generic purpose distributions. See fedora's planned rollout: https://fedoraproject.org/wiki/Changes/Unified_Kernel_Support_Phase_1 Or Arch: https://wiki.archlinux.org/title/Unified_kernel_image There are UKI addons that help you achieve this. These are additional PE files that contain for example additional cmdline parameters. On a generic Linux distro doing secure boot you'd generally use shim, could enroll MOK and use that to sign an addon for your machine. This patch currently does not support addons. The plan would be to support them in the future though. I personally always run my own compiled kernel and build a UKI from that so I can obviously tweak the cmdline that way and sign the UKI with my own secure boot key. >> The format itself is rather simple. It's just a PE file (as required >> by the UEFI spec) that contains a small stub application in the .text, >> .data, etc sections that is responsible for invoking the contained >> kernel and initrd with the contained cmdline. The kernel image is >> placed into a .kernel section, the initrd into a .initrd section, and >> the cmdline into a .cmdline section in the PE executable. > > How does this interact with the existing EFI stub support in linux? It doesn't. During normal boot of a UKI the stub in it is used (systemd-stub, see: https://www.freedesktop.org/software/systemd/man/systemd-stub.html). The kernel's own EFI stub will still be in the binary inside the .linux section but not used. Now in this patch (also see v2 I already posted) obviously non of the stubs are used. ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [systemd-devel] [PATCH 0/1] x86/kexec: UKI support
On Tue Sep 12, 2023 at 2:20 AM EEST, Neal Gompa wrote: > On Mon, Sep 11, 2023 at 7:15 PM Jarkko Sakkinen wrote: > > On Sat Sep 9, 2023 at 7:18 PM EEST, Jan Hendrik Farr wrote: > > > Hello, > > > > > > this patch implements UKI support for kexec_file_load. It will require > > > support > > > in the kexec-tools userspace utility. For testing purposes the following > > > can be used: > > > https://github.com/Cydox/kexec-test/ > > > > > > There has been discussion on this topic in an issue on GitHub that is > > > linked below > > > for reference. > > > > > > > > > Some links: > > > - Related discussion: https://github.com/systemd/systemd/issues/28538 > > > - Documentation of UKIs: > > > https://uapi-group.org/specifications/specs/unified_kernel_image/ > > > > > > Jan Hendrik Farr (1): > > > x86/kexec: UKI support > > > > > > arch/x86/include/asm/kexec-uki.h | 7 ++ > > > arch/x86/include/asm/parse_pefile.h| 32 +++ > > > arch/x86/kernel/Makefile | 2 + > > > arch/x86/kernel/kexec-uki.c| 113 + > > > arch/x86/kernel/machine_kexec_64.c | 2 + > > > arch/x86/kernel/parse_pefile.c | 110 > > > crypto/asymmetric_keys/mscode_parser.c | 2 +- > > > crypto/asymmetric_keys/verify_pefile.c | 110 +++- > > > crypto/asymmetric_keys/verify_pefile.h | 16 > > > 9 files changed, 278 insertions(+), 116 deletions(-) > > > create mode 100644 arch/x86/include/asm/kexec-uki.h > > > create mode 100644 arch/x86/include/asm/parse_pefile.h > > > create mode 100644 arch/x86/kernel/kexec-uki.c > > > create mode 100644 arch/x86/kernel/parse_pefile.c > > > > > > -- > > > 2.40.1 > > > > What the heck is UKI? > > Unified Kernel Images. More details available here: > https://uapi-group.org/specifications/specs/unified_kernel_image/ > > It's a way of creating initramfs-style images as fully generic, > reproducible images that can be built server-side. You can build today a kernel with these compiled in: 1. EFI stub 2. initeramfs 3. cmdline Why another way (and label 'UKI') for a pre-existing feature? BR, Jarkko ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/1] x86/kexec: UKI support
On Tue Sep 12, 2023 at 1:54 AM EEST, Jan Hendrik Farr wrote: > > What the heck is UKI? > > UKI (Unified Kernel Image) is the kernel image + initrd + cmdline (+ > some other optional stuff) all packaged up together as one EFI > application. > > This EFI application can then be launched directly by the UEFI without > the need for any additional stuff (or by systemd-boot). It's all self > contained. One benefit is that this is a convenient way to distribute > kernels all in one file. Another benefit is that the whole combination > of kernel image, initrd, and cmdline can all be signed together so > only that particular combination can be executed if you are using > secure boot. Is this also for generic purpose distributions? I mean it is not uncommon having to tweak the command-line in a workstation. > The format itself is rather simple. It's just a PE file (as required > by the UEFI spec) that contains a small stub application in the .text, > .data, etc sections that is responsible for invoking the contained > kernel and initrd with the contained cmdline. The kernel image is > placed into a .kernel section, the initrd into a .initrd section, and > the cmdline into a .cmdline section in the PE executable. How does this interact with the existing EFI stub support in linux? > If we want to kexec a UKI we could obviously just have userspace pick > it apart and kexec it like normal. However in lockdown mode this will > only work if you sign the kernel image that is contained inside the > UKI. The problem with that is that anybody can then grab that signed > kernel and launch it with any initrd or cmdline. So instead this patch > makes the kernel do the work instead. The kernel verifies the > signature on the entire UKI and then passes its components on to the > normal kexec bzimage loader. > > Useful Links: > UKI format documentation: > https://uapi-group.org/specifications/specs/unified_kernel_image/ > Arch wiki: https://wiki.archlinux.org/title/Unified_kernel_image > Fedora UKI support: > https://fedoraproject.org/wiki/Changes/Unified_Kernel_Support_Phase_1 BR, Jarkko ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v2 2/3] vmcore: allow fadump to export vmcore even if is_kdump_kernel() is false
On 11/09/23 4:01 pm, Baoquan He wrote: On 09/11/23 at 05:13pm, Michael Ellerman wrote: Hari Bathini writes: Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set. While elfcorehdr_addr is set for kexec based kernel dump mechanism, alternate dump capturing methods like fadump [1] also set it to export the vmcore. Since, is_kdump_kernel() is used to restrict resources in crash dump capture kernel and such restrictions are not desirable for fadump, allow is_kdump_kernel() to be defined differently for fadump case. With that change, include is_fadump_active() check in functions is_vmcore_usable() & vmcore_unusable() to be able to export vmcore for fadump case too. ... diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0f3a656293b0..de8a9fabfb6f 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -50,6 +50,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -64,6 +65,19 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif + +#ifndef is_fadump_active +/* + * If f/w assisted dump capturing mechanism (fadump), instead of kexec based + * dump capturing mechanism (kdump) is exporting the vmcore, then this function + * will be defined in arch specific code to return true, when appropriate. + */ +static inline bool is_fadump_active(void) +{ + return false; +} +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -75,7 +89,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return (is_kdump_kernel() || is_fadump_active()) + && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -84,7 +99,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) + if (is_kdump_kernel() || is_fadump_active()) elfcorehdr_addr = ELFCORE_ADDR_ERR; } I think it would be cleaner to decouple is_vmcore_usable() and vmcore_usable() from is_kdump_kernel(). ie, make them operate solely based on the value of elforehdr_addr: static inline int is_vmcore_usable(void) { elfcorehdr_addr != ELFCORE_ADDR_ERR && \ elfcorehdr_addr != ELFCORE_ADDR_MAX; Agree. I fell into the blind corner of thinking earlier. Above change is better. Thanks for the reviews. Posted v3. - Hari ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v3 2/2] powerpc/fadump: make is_kdump_kernel() return false when fadump is active
Currently, is_kdump_kernel() returns true in crash dump capture kernel for both kdump and fadump crash dump capturing methods, as both these methods set elfcorehdr_addr. Some restrictions enforced for crash dump capture kernel, based on is_kdump_kernel(), are specifically meant for kdump case and not desirable for fadump - eg. IO queues restriction in device drivers. So, define is_kdump_kernel() to return false when f/w assisted dump is active. Signed-off-by: Hari Bathini --- arch/powerpc/include/asm/kexec.h | 8 ++-- arch/powerpc/kernel/crash_dump.c | 12 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index a1ddba01e7d1..e1b43aa12175 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -99,10 +99,14 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co void kexec_copy_flush(struct kimage *image); -#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS) +#if defined(CONFIG_CRASH_DUMP) +bool is_kdump_kernel(void); +#define is_kdump_kernelis_kdump_kernel +#if defined(CONFIG_PPC_RTAS) void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); #define crash_free_reserved_phys_range crash_free_reserved_phys_range -#endif +#endif /* CONFIG_PPC_RTAS */ +#endif /* CONFIG_CRASH_DUMP */ #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9a3b85bfc83f..2086fa6cdc25 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef DEBUG #include @@ -92,6 +93,17 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, return csize; } +/* + * Return true only when kexec based kernel dump capturing method is used. + * This ensures all restritions applied for kdump case are not automatically + * applied for fadump case. + */ +bool is_kdump_kernel(void) +{ + return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX; +} +EXPORT_SYMBOL_GPL(is_kdump_kernel); + #ifdef CONFIG_PPC_RTAS /* * The crashkernel region will almost always overlap the RTAS region, so -- 2.41.0 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v3 1/2] vmcore: remove dependency with is_kdump_kernel() for exporting vmcore
Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set. While elfcorehdr_addr is set for kexec based kernel dump mechanism, alternate dump capturing methods like fadump [1] also set it to export the vmcore. Since, is_kdump_kernel() is used to restrict resources in crash dump capture kernel and such restrictions may not be desirable for fadump, allow is_kdump_kernel() to be defined differently for such scenarios. With this, is_kdump_kernel() could be false while vmcore is usable. So, remove unnecessary dependency with is_kdump_kernel(), for exporting vmcore. [1] https://docs.kernel.org/powerpc/firmware-assisted-dump.html Suggested-by: Michael Ellerman Signed-off-by: Hari Bathini --- Changes in v3: * Decoupled is_vmcore_usable() & vmcore_unusable() from is_kdump_kernel() as suggested here: https://lore.kernel.org/linuxppc-dev/ZP7si3UMVpPfYV+w@MiWiFi-R3L-srv/T/#m13ae5a7e4ba6f4d8397f0f66581832292eee3a85 include/linux/crash_dump.h | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0f3a656293b0..acc55626afdc 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -50,6 +50,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_ERR && + elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) - elfcorehdr_addr = ELFCORE_ADDR_ERR; + elfcorehdr_addr = ELFCORE_ADDR_ERR; } /** -- 2.41.0 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH V2 1/2] efi/unaccepted: Do not let /proc/vmcore try to access unaccepted memory
On 12.09.23 09:47, Adrian Hunter wrote: On 12/09/23 10:19, David Hildenbrand wrote: On 11.09.23 13:21, Adrian Hunter wrote: Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Do not let /proc/vmcore try to access unaccepted memory because it can cause the guest to fail. Oh, hold on. What are the actual side effects of this? Once we're in the kdump kernel, any guest is already dead. So failing a guest doesn't apply, no? Unaccepted Memory is used by virtual machines. In this case the guest has kexec'ed to a dump-capture kernel, so the virtual machine is still alive and running the dump-capture kernel. Ah, I got lost in TDX host semantics. So what you're saying, if we (guest) are reading unnaccepted memory we will get zapped. Makes sense. -- Cheers, David / dhildenb ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH V2 1/2] efi/unaccepted: Do not let /proc/vmcore try to access unaccepted memory
On 12/09/23 10:19, David Hildenbrand wrote: > On 11.09.23 13:21, Adrian Hunter wrote: >> Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 >> ("mm: Add support for unaccepted memory"), whereby a virtual machine may >> need to accept memory before it can be used. >> >> Do not let /proc/vmcore try to access unaccepted memory because it can >> cause the guest to fail. > > Oh, hold on. What are the actual side effects of this? > > Once we're in the kdump kernel, any guest is already dead. So failing a guest > doesn't apply, no? > Unaccepted Memory is used by virtual machines. In this case the guest has kexec'ed to a dump-capture kernel, so the virtual machine is still alive and running the dump-capture kernel. ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH V2 1/2] efi/unaccepted: Do not let /proc/vmcore try to access unaccepted memory
On 11.09.23 13:21, Adrian Hunter wrote: Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Do not let /proc/vmcore try to access unaccepted memory because it can cause the guest to fail. Oh, hold on. What are the actual side effects of this? Once we're in the kdump kernel, any guest is already dead. So failing a guest doesn't apply, no? -- Cheers, David / dhildenb ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH V2 1/2] efi/unaccepted: Do not let /proc/vmcore try to access unaccepted memory
On 11.09.23 13:21, Adrian Hunter wrote: Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Do not let /proc/vmcore try to access unaccepted memory because it can cause the guest to fail. For /proc/vmcore, which is read-only, this means a read or mmap of unaccepted memory will return zeros. Signed-off-by: Adrian Hunter --- [...] +static inline bool pfn_is_unaccepted_memory(unsigned long pfn) +{ + phys_addr_t paddr = pfn << PAGE_SHIFT; + + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); +} + #endif /* _LINUX_MM_H */ As stated, if the relevant table is not already properly populated with information about unaccepted memory by the first kernel, this probably logically belongs into Kirills series. Reviewed-by: David Hildenbrand -- Cheers, David / dhildenb ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH V2 2/2] proc/kcore: Do not try to access unaccepted memory
On 11.09.23 13:21, Adrian Hunter wrote: Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Do not try to access unaccepted memory because it can cause the guest to fail. For /proc/kcore, which is read-only and does not support mmap, this means a read of unaccepted memory will return zeros. Signed-off-by: Adrian Hunter --- fs/proc/kcore.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Changes in V2: Change patch subject and commit message Do not open code pfn_is_unaccepted_memory() diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 23fc24d16b31..6422e569b080 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -546,7 +546,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) * and explicitly excluded physical ranges. */ if (!page || PageOffline(page) || - is_page_hwpoison(page) || !pfn_is_ram(pfn)) { + is_page_hwpoison(page) || !pfn_is_ram(pfn) || + pfn_is_unaccepted_memory(pfn)) { if (iov_iter_zero(tsz, iter) != tsz) { ret = -EFAULT; goto out; Reviewed-by: David Hildenbrand -- Cheers, David / dhildenb ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec