The branch main has been updated by imp:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3915ffb1c3e04b26d1506bf35d3f665b2e25a915

commit 3915ffb1c3e04b26d1506bf35d3f665b2e25a915
Author:     Warner Losh <[email protected]>
AuthorDate: 2026-06-05 05:18:40 +0000
Commit:     Warner Losh <[email protected]>
CommitDate: 2026-06-06 01:24:42 +0000

    loader.efi: Fix when staging moves late
    
    Prior to this commit, we'd compute the page tables and have the last
    entries point to the staging area. We'd then add some more metadata to
    the image and boot. This assumed the staging area didn't need to move
    for this last bit of data.
    
    However, if we go over the staging limit, when we copyin new data, we
    grow the staging area, usually be moving it to a lower address.  This
    overage usually happens when we're loading modules and so things work
    out nicely. Sometimes we're close to the limit, and we need to do this
    growing inside bi_load, after we've computed the page table, making the
    page table wrong, and the code we jump to random rather than the btext
    routine we normally start at.
    
    To fix this, move computation of the table (but not its allocation) to
    after bi_load, but before we call the trampoline.
    
    This problem was most observed when loading microcode for many peole,
    but Gleb reproduced the error with a set of modules that didn't include
    ucode.
    
    This bug hunt was greatly assisted by Claude who looked at the crash
    from the EFI boot loader and surmised that we weren't jumping to the
    code we thought we were jumping to. After inspecting the code, I asked
    claude how corruption could happen (I thought overwriting the page
    table), but claude notice the possibility that staging might change
    after we computed the page table, and this fix is the result. Claude
    didn't suggest a diff, but did provide many helpful clues that lead me
    to this fix.
    
    PR: 294630
    Reviewed by: kib (prior version)
    Sponsored by: Netflix
    MFC After: insta per re@ request
    Differential Revision: https://reviews.freebsd.org/D57462
---
 stand/efi/loader/arch/amd64/elf64_freebsd.c | 51 ++++++++++++++-----------
 stand/efi/loader/arch/i386/elf64_freebsd.c  | 58 +++++++++++++++++------------
 stand/efi/loader/bootinfo.c                 | 18 ++++++++-
 3 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/stand/efi/loader/arch/amd64/elf64_freebsd.c 
b/stand/efi/loader/arch/amd64/elf64_freebsd.c
index 35bd4d6c1419..62716d7b1369 100644
--- a/stand/efi/loader/arch/amd64/elf64_freebsd.c
+++ b/stand/efi/loader/arch/amd64/elf64_freebsd.c
@@ -89,7 +89,7 @@ elf64_exec(struct preloaded_file *fp)
        Elf_Ehdr                *ehdr;
        vm_offset_t             modulep, kernend, trampcode, trampstack;
        int                     err, i;
-       bool                    copy_auto;
+       bool                    copy_auto, needs_pt4;
 
        copy_auto = copy_staging == COPY_STAGING_AUTO;
        if (copy_auto)
@@ -156,6 +156,7 @@ elf64_exec(struct preloaded_file *fp)
                        PT2[i] = (pd_entry_t)i * M(2);
                        PT2[i] |= PG_V | PG_RW | PG_PS;
                }
+               needs_pt4 = false;
        } else {
                PT4 = (pml4_entry_t *)G(4);
                err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 9,
@@ -167,7 +168,35 @@ elf64_exec(struct preloaded_file *fp)
                                copy_staging = COPY_STAGING_AUTO;
                        return (ENOMEM);
                }
+               needs_pt4 = true;
+       }
+
+       printf("%scopying staging tramp %p PT4 %p\n",
+           copy_staging == COPY_STAGING_ENABLE ? "" : "not ",
+           trampoline, PT4);
+       printf("Start @ 0x%lx ...\n", ehdr->e_entry);
+
+       /*
+        * we have to cleanup here because net_cleanup() doesn't work after
+        * we call ExitBootServices
+        */
+       dev_cleanup();
 
+       efi_time_fini();
+       err = bi_load(fp->f_args, &modulep, &kernend, true);
+       if (err != 0) {
+               efi_time_init();
+               if (copy_auto)
+                       copy_staging = COPY_STAGING_AUTO;
+               return (err);
+       }
+
+       /*
+        * staging might move in bi_load because we automatiaclly move when we
+        * copy data in. At this point, staging can't move anymore, so create
+        * PT4 with the correct value.
+        */
+       if (needs_pt4) {
                bzero(PT4, 9 * EFI_PAGE_SIZE);
 
                PT3_l = &PT4[NPML4EPG * 1];
@@ -204,26 +233,6 @@ elf64_exec(struct preloaded_file *fp)
                }
        }
 
-       printf("staging %#lx (%scopying) tramp %p PT4 %p\n",
-           staging, copy_staging == COPY_STAGING_ENABLE ? "" : "not ",
-           trampoline, PT4);
-       printf("Start @ 0x%lx ...\n", ehdr->e_entry);
-
-       /*
-        * we have to cleanup here because net_cleanup() doesn't work after
-        * we call ExitBootServices
-        */
-       dev_cleanup();
-
-       efi_time_fini();
-       err = bi_load(fp->f_args, &modulep, &kernend, true);
-       if (err != 0) {
-               efi_time_init();
-               if (copy_auto)
-                       copy_staging = COPY_STAGING_AUTO;
-               return (err);
-       }
-
        trampoline(trampstack, copy_staging == COPY_STAGING_ENABLE ?
            efi_copy_finish : efi_copy_finish_nop, kernend, modulep,
            PT4, ehdr->e_entry);
diff --git a/stand/efi/loader/arch/i386/elf64_freebsd.c 
b/stand/efi/loader/arch/i386/elf64_freebsd.c
index 22cdd685ea9b..f3f8c91e0275 100644
--- a/stand/efi/loader/arch/i386/elf64_freebsd.c
+++ b/stand/efi/loader/arch/i386/elf64_freebsd.c
@@ -99,6 +99,7 @@ elf64_exec(struct preloaded_file *fp)
        struct user_segment_descriptor *gdt;
        vm_offset_t             modulep, kernend, trampstack;
        int i;
+       bool needs_pt4;
 
        switch (copy_staging) {
        case COPY_STAGING_ENABLE:
@@ -199,10 +200,8 @@ elf64_exec(struct preloaded_file *fp)
                         */
                        PT2[i] = (i * M(2)) | PG_V | PG_RW | PG_PS;
                }
+               needs_pt4 = false;
        } else {
-               pdpt_entry_t    *PT3_l, *PT3_u;
-               pd_entry_t      *PT2_l0, *PT2_l1, *PT2_l2, *PT2_l3, *PT2_u0, 
*PT2_u1;
-
                err = BS->AllocatePages(AllocateAnyPages, EfiLoaderData,
                    EFI_SIZE_TO_PAGES(512 * 9 * sizeof(uint64_t)), &ptr);
                if (EFI_ERROR(err)) {
@@ -213,6 +212,38 @@ elf64_exec(struct preloaded_file *fp)
                }
                PT4 = (pml4_entry_t *)(uintptr_t)ptr;
 
+               needs_pt4 = true;
+       }
+
+       printf("%scopying staging tramp %p PT4 %p GDT %p\n"
+           "Start @ %#llx ...\n",
+           type == AllocateMaxAddress ? "" : "not ", trampoline, PT4, gdt,
+           ehdr->e_entry
+       );
+
+
+       /*
+        * we have to cleanup here because net_cleanup() doesn't work after
+        * we call ExitBootServices
+        */
+       dev_cleanup();
+
+       efi_time_fini();
+       err = bi_load(fp->f_args, &modulep, &kernend, true);
+       if (err != 0) {
+               efi_time_init();
+               return (err);
+       }
+
+       /*
+        * staging might move in bi_load because we automatiaclly move when we
+        * copy data in. At this point, staging can't move anymore, so create
+        * PT4 with the correct value.
+        */
+       if (needs_pt4) {
+               pdpt_entry_t    *PT3_l, *PT3_u;
+               pd_entry_t      *PT2_l0, *PT2_l1, *PT2_l2, *PT2_l3, *PT2_u0, 
*PT2_u1;
+
                PT3_l = &PT4[512];
                PT3_u = &PT3_l[512];
                PT2_l0 = &PT3_u[512];
@@ -245,27 +276,6 @@ elf64_exec(struct preloaded_file *fp)
                }
        }
 
-       printf(
-           "staging %#llx (%scopying) tramp %p PT4 %p GDT %p\n"
-           "Start @ %#llx ...\n", staging,
-           type == AllocateMaxAddress ? "" : "not ", trampoline, PT4, gdt,
-           ehdr->e_entry
-       );
-
-
-       /*
-        * we have to cleanup here because net_cleanup() doesn't work after
-        * we call ExitBootServices
-        */
-       dev_cleanup();
-
-       efi_time_fini();
-       err = bi_load(fp->f_args, &modulep, &kernend, true);
-       if (err != 0) {
-               efi_time_init();
-               return (err);
-       }
-
        trampoline(trampstack, type == AllocateMaxAddress ? efi_copy_finish :
            efi_copy_finish_nop, kernend, modulep, PT4, gdtr, ehdr->e_entry);
 
diff --git a/stand/efi/loader/bootinfo.c b/stand/efi/loader/bootinfo.c
index fc87a0bf52fb..14e16ed5df81 100644
--- a/stand/efi/loader/bootinfo.c
+++ b/stand/efi/loader/bootinfo.c
@@ -209,6 +209,16 @@ bi_load_efi_data(struct preloaded_file *kfp, bool exit_bs)
        }
 #endif
 
+#if defined(__amd64__) || defined(__i386__)
+       /*
+        * Staging can't move after this point, so report the final value before
+        * we try to exit boot services below. The metadata added is added to
+        * the malloced arena that we setup when we started and doesn't interact
+        * with boot services.
+        */
+       printf("staging %#jx\n", (uintmax_t)staging);
+#endif
+
        do_vmap = true;
        efi_novmap = getenv("efi_disable_vmap");
        if (efi_novmap != NULL)
@@ -298,14 +308,20 @@ bi_load_efi_data(struct preloaded_file *kfp, bool exit_bs)
         * loader.conf(5). By default we will setup the virtual
         * map entries.
         */
-
        if (do_vmap)
                efi_do_vmap(mm, sz, dsz, mmver);
+
+       /*
+        * Add the memory map to the metadata. addmetadata copies the data into
+        * the malloc arena, so we can safely free the memory map pages after.
+        * Or could if boot services was still running.
+        */
        efihdr->memory_size = sz;
        efihdr->descriptor_size = dsz;
        efihdr->descriptor_version = mmver;
        file_addmetadata(kfp, MODINFOMD_EFI_MAP, efisz + sz,
            efihdr);
+       /* BS->FreePages(addr, pages); */
 
        return (0);
 }

Reply via email to